Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import logging | |
| import urllib.parse as urlparse | |
| import io | |
| import contextlib | |
| from functools import lru_cache, wraps | |
| # Add necessary imports for new tools | |
| import chess | |
| from stockfish import Stockfish | |
| from dotenv import load_dotenv | |
| from requests.exceptions import RequestException | |
| import serpapi | |
| from llama_index.core import VectorStoreIndex, download_loader | |
| from llama_index.core.schema import Document | |
| from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiError | |
| from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool | |
| from smolagents import InferenceClientModel | |
| # --- Configuration and Setup --- | |
| def configure_logging(): | |
| """Sets up basic logging configuration.""" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S" | |
| ) | |
| def load_api_keys(): | |
| """Loads API keys from a .env file.""" | |
| load_dotenv() | |
| keys = { | |
| 'together': os.getenv('TOGETHER_API_KEY'), | |
| 'serpapi': os.getenv('SERPAPI_API_KEY'), | |
| } | |
| if not all(keys.values()): | |
| raise ValueError("One or more API keys are missing. Please check your .env file.") | |
| return keys | |
| # --- Decorators --- | |
| def retry(max_retries=3, initial_delay=1, backoff=2): | |
| """A robust retry decorator with exponential backoff.""" | |
| def decorator(func): | |
| def wrapper(*args, **kwargs): | |
| delay = initial_delay | |
| # Define specific, retry-able exceptions | |
| retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError) | |
| for attempt in range(1, max_retries + 1): | |
| try: | |
| return func(*args, **kwargs) | |
| except retryable_exceptions as e: | |
| if attempt == max_retries: | |
| logging.error(f"{func.__name__} failed after {attempt} attempts: {e}") | |
| raise | |
| logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...") | |
| time.sleep(delay) | |
| delay *= backoff | |
| except Exception as e: | |
| logging.error(f"{func.__name__} failed with a non-retryable error: {e}") | |
| raise | |
| return wrapper | |
| return decorator | |
| # --- Main Agent Initialization (as called by app.py) --- | |
| def initialize_agent(): | |
| """ | |
| Initializes a multi-disciplinary agent with a toolset and reasoning framework | |
| designed for the benchmark's question categories. | |
| """ | |
| api_keys = load_api_keys() | |
| # --- Caching Layer for LlamaIndex --- | |
| def get_webpage_index(url: str) -> VectorStoreIndex: | |
| logging.info(f"Indexing webpage: {url}") | |
| loader_cls = download_loader("BeautifulSoupWebReader") | |
| loader = loader_cls() | |
| docs = loader.load_data(urls=[url]) | |
| return VectorStoreIndex.from_documents(docs) | |
| def get_youtube_index(video_id: str) -> VectorStoreIndex: | |
| logging.info(f"Indexing YouTube video: {video_id}") | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| text = ' '.join([t['text'] for t in transcript]) | |
| doc = Document(text=text, doc_id=f"youtube_{video_id}") | |
| return VectorStoreIndex.from_documents([doc]) | |
| # --- Specialized Tool Definitions --- | |
| # 1. Web Search Tools | |
| def google_search(query: str) -> str: | |
| """Use for general knowledge questions, finding facts, or when you don't have a specific URL.""" | |
| client = serpapi.Client(api_key=api_keys['serpapi']) | |
| results = client.search(q=query, engine="google") | |
| if organic_results := results.get('organic_results'): | |
| md = ["### Top Search Results"] | |
| for res in organic_results[:5]: | |
| md.append(f"- **{res.get('title', 'N/A')}**: {res.get('snippet', 'No snippet available.')}\n [Source]({res.get('link', '#')})") | |
| return "\n\n".join(md) | |
| return "No results found." | |
| def query_webpage(url: str, query: str) -> str: | |
| """Use when you need to answer a specific question about the content of a single webpage URL.""" | |
| try: | |
| index = get_webpage_index(url) | |
| return str(index.as_query_engine().query(query)) | |
| except Exception as e: | |
| return f"Error querying webpage {url}: {e}" | |
| # 2. YouTube Tool | |
| def query_youtube_video(video_url_or_id: str, query: str) -> str: | |
| """Use for questions about the content of a YouTube video. Accepts a full URL or a video ID.""" | |
| try: | |
| video_id = video_url_or_id | |
| if "youtube.com" in video_url_or_id or "youtu.be" in video_url_or_id: | |
| parsed_url = urlparse.urlparse(video_url_or_id) | |
| video_id = urlparse.parse_qs(parsed_url.query).get('v', [None])[0] | |
| if not video_id: | |
| video_id = parsed_url.path.lstrip('/') | |
| if not video_id: | |
| return "Error: Could not extract a valid YouTube video ID." | |
| index = get_youtube_index(video_id) | |
| return str(index.as_query_engine().query(query)) | |
| except YouTubeTranscriptApiError as e: | |
| return f"Error fetching transcript for video {video_id}: {e}" | |
| except Exception as e: | |
| return f"Error querying YouTube video {video_id}: {e}" | |
| # 3. Coding Tool | |
| def run_python_code(code: str) -> str: | |
| """ | |
| Executes a string of Python code and returns its standard output. | |
| Use this for coding challenges, calculations, or data manipulation. | |
| The code is executed in a restricted environment; it cannot access external files. | |
| """ | |
| output = io.StringIO() | |
| try: | |
| with contextlib.redirect_stdout(output): | |
| exec(code, {}) | |
| return output.getvalue() | |
| except Exception as e: | |
| return f"Error executing code: {e}" | |
| # 4. Chess Tool | |
| def get_chess_move(fen: str) -> str: | |
| """ | |
| Finds the best chess move for a given board position in FEN format. | |
| Use this exclusively for chess-related questions. | |
| """ | |
| # Path to stockfish can be set via env var for flexibility in HF Spaces | |
| stockfish_path = os.getenv("STOCKFISH_PATH", "/usr/games/stockfish") | |
| if not os.path.exists(stockfish_path): | |
| return f"Error: Stockfish engine not found at {stockfish_path}. Please set STOCKFISH_PATH environment variable." | |
| try: | |
| stockfish = Stockfish(path=stockfish_path) | |
| stockfish.set_fen_position(fen) | |
| best_move = stockfish.get_best_move() | |
| return best_move | |
| except Exception as e: | |
| return f"Error analyzing chess position: {e}" | |
| # --- Model and Agent Initialization --- | |
| try: | |
| model = InferenceClientModel( | |
| model_id="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| token=api_keys['together'], | |
| provider="together" | |
| ) | |
| logging.info("Model loaded successfully.") | |
| except Exception as e: | |
| logging.error(f"Failed to load model: {e}") | |
| raise | |
| # A single, powerful worker agent with a diverse toolset | |
| worker_agent = ToolCallingAgent( | |
| tools=[ | |
| google_search, | |
| query_webpage, | |
| query_youtube_video, | |
| run_python_code, | |
| get_chess_move, | |
| ], | |
| model=model, | |
| max_steps=5, # Sub-tasks should be short and focused | |
| name="multi_tool_worker", | |
| description="A specialized worker agent that can search the web, query videos, execute code, and play chess." | |
| ) | |
| # The manager agent acts as a strategic dispatcher. | |
| manager = CodeAgent( | |
| model=model, | |
| managed_agents=[worker_agent], | |
| instructions=""" | |
| You are a master AI assistant responsible for answering a user's question. Your goal is to provide a single, precise, and final answer. | |
| **Your Strategic Thought Process for GAIA Tasks:** | |
| 1. **ANALYZE THE QUESTION (Deep Understanding):** | |
| * Carefully read and dissect the user's question. Identify all constraints, keywords, and the exact format required for the final answer (e.g., number, string, comma-separated list, specific units). | |
| * Determine the core task: Is it a factual lookup, data extraction, code execution, video analysis, or a chess problem? | |
| 2. **FORMULATE A DETAILED PLAN (Multi-step if needed):** | |
| * Based on your analysis, outline a step-by-step strategy. For complex questions (Level 2/3 GAIA), this plan might involve multiple tool calls and intermediate reasoning steps. | |
| * **Prioritize Tools:** | |
| * For general web searches or initial broad information gathering, prefer `WebSearchTool()`. It's often quicker for a first pass. | |
| * If `WebSearchTool()` doesn't yield precise results, or if you need structured data (e.g., AI overviews, specific facts from search results), use `google_search` (SerpApi). | |
| * For extracting specific information from a known webpage URL, use `query_webpage`. | |
| * For questions about YouTube video content, use `query_youtube_video`. | |
| * For computational tasks or code generation, use `run_python_code`. | |
| * For chess problems, use `get_chess_move`. | |
| * Consider potential pitfalls and how to recover (e.g., if a search yields no results, try a different query). | |
| 3. **EXECUTE AND ITERATE (Tool Delegation & Synthesis):** | |
| * Delegate tasks to the `multi_tool_worker` agent, providing the exact tool and parameters. | |
| * Carefully evaluate the output from each tool call. | |
| * If the output is not sufficient, refine your query or try a different tool/approach. This is where iterative refinement and self-correction are crucial. | |
| * Synthesize information from multiple sources if necessary to build the complete answer. | |
| 4. **FORMULATE THE FINAL ANSWER (Precision & Format):** | |
| * Once you have definitively found the answer, format it *exactly* as requested in the original question. | |
| * **DO NOT** add any extra text, explanations, or conversational filler. The final answer must be *only* the answer itself. | |
| * Example: If the question asks for a number and the answer is "123", your output should be `FINAL ANSWER: 123`. If it asks for a string "New York", your output should be `FINAL ANSWER: New York`. If it asks for a comma-separated list "apple,banana", your output should be `FINAL ANSWER: apple,banana`. | |
| """ | |
| ) | |
| logging.info("Multi-task agent initialized successfully.") | |
| return manager | |
| # --- Main Execution Block for Local Testing --- | |
| def main(): | |
| """Main function for local testing of the agent.""" | |
| configure_logging() | |
| try: | |
| global SerpApiClientException | |
| from serpapi.client import SerpApiClientException | |
| agent = initialize_agent() | |
| if agent: | |
| # Example prompts for each category | |
| prompts = { | |
| "Web Search": "Who is the current CEO of OpenAI?", | |
| "YouTube": "What is the main topic of the video https://www.youtube.com/watch?v=bZQun8Y4L2A regarding AI models?", | |
| "Coding": "Write a Python script that calculates and prints the factorial of 5.", | |
| "Chess": "What is the best move for the starting chess position? The FEN is 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'." | |
| } | |
| for category, prompt in prompts.items(): | |
| logging.info(f"\n--- Testing Category: {category} ---") | |
| logging.info(f"Prompt: {prompt}") | |
| response = agent.run(prompt) | |
| logging.info(f"Agent's Final Answer: {response}") | |
| logging.info("-" * (30 + len(category))) | |
| except Exception as e: | |
| logging.critical(f"An unhandled error occurred during local testing: {e}", exc_info=True) | |
| if __name__ == "__main__": | |
| # This allows you to test the agent's logic by running `python agent.py` locally. | |
| main() |