jesusgj
Modified files
892cc72
raw
history blame
12.3 kB
import os
import time
import logging
import urllib.parse as urlparse
import io
import contextlib
from functools import lru_cache, wraps
# Add necessary imports for new tools
import chess
from stockfish import Stockfish
from dotenv import load_dotenv
from requests.exceptions import RequestException
import serpapi
from llama_index.core import VectorStoreIndex, download_loader
from llama_index.core.schema import Document
from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiError
from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
from smolagents import InferenceClientModel
# --- Configuration and Setup ---
def configure_logging():
"""Sets up basic logging configuration."""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
def load_api_keys():
"""Loads API keys from a .env file."""
load_dotenv()
keys = {
'together': os.getenv('TOGETHER_API_KEY'),
'serpapi': os.getenv('SERPAPI_API_KEY'),
}
if not all(keys.values()):
raise ValueError("One or more API keys are missing. Please check your .env file.")
return keys
# --- Decorators ---
def retry(max_retries=3, initial_delay=1, backoff=2):
"""A robust retry decorator with exponential backoff."""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
delay = initial_delay
# Define specific, retry-able exceptions
retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError)
for attempt in range(1, max_retries + 1):
try:
return func(*args, **kwargs)
except retryable_exceptions as e:
if attempt == max_retries:
logging.error(f"{func.__name__} failed after {attempt} attempts: {e}")
raise
logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...")
time.sleep(delay)
delay *= backoff
except Exception as e:
logging.error(f"{func.__name__} failed with a non-retryable error: {e}")
raise
return wrapper
return decorator
# --- Main Agent Initialization (as called by app.py) ---
def initialize_agent():
"""
Initializes a multi-disciplinary agent with a toolset and reasoning framework
designed for the benchmark's question categories.
"""
api_keys = load_api_keys()
# --- Caching Layer for LlamaIndex ---
@lru_cache(maxsize=32)
@retry()
def get_webpage_index(url: str) -> VectorStoreIndex:
logging.info(f"Indexing webpage: {url}")
loader_cls = download_loader("BeautifulSoupWebReader")
loader = loader_cls()
docs = loader.load_data(urls=[url])
return VectorStoreIndex.from_documents(docs)
@lru_cache(maxsize=32)
@retry()
def get_youtube_index(video_id: str) -> VectorStoreIndex:
logging.info(f"Indexing YouTube video: {video_id}")
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text = ' '.join([t['text'] for t in transcript])
doc = Document(text=text, doc_id=f"youtube_{video_id}")
return VectorStoreIndex.from_documents([doc])
# --- Specialized Tool Definitions ---
# 1. Web Search Tools
@tool
@retry()
def google_search(query: str) -> str:
"""Use for general knowledge questions, finding facts, or when you don't have a specific URL."""
client = serpapi.Client(api_key=api_keys['serpapi'])
results = client.search(q=query, engine="google")
if organic_results := results.get('organic_results'):
md = ["### Top Search Results"]
for res in organic_results[:5]:
md.append(f"- **{res.get('title', 'N/A')}**: {res.get('snippet', 'No snippet available.')}\n [Source]({res.get('link', '#')})")
return "\n\n".join(md)
return "No results found."
@tool
def query_webpage(url: str, query: str) -> str:
"""Use when you need to answer a specific question about the content of a single webpage URL."""
try:
index = get_webpage_index(url)
return str(index.as_query_engine().query(query))
except Exception as e:
return f"Error querying webpage {url}: {e}"
# 2. YouTube Tool
@tool
def query_youtube_video(video_url_or_id: str, query: str) -> str:
"""Use for questions about the content of a YouTube video. Accepts a full URL or a video ID."""
try:
video_id = video_url_or_id
if "youtube.com" in video_url_or_id or "youtu.be" in video_url_or_id:
parsed_url = urlparse.urlparse(video_url_or_id)
video_id = urlparse.parse_qs(parsed_url.query).get('v', [None])[0]
if not video_id:
video_id = parsed_url.path.lstrip('/')
if not video_id:
return "Error: Could not extract a valid YouTube video ID."
index = get_youtube_index(video_id)
return str(index.as_query_engine().query(query))
except YouTubeTranscriptApiError as e:
return f"Error fetching transcript for video {video_id}: {e}"
except Exception as e:
return f"Error querying YouTube video {video_id}: {e}"
# 3. Coding Tool
@tool
def run_python_code(code: str) -> str:
"""
Executes a string of Python code and returns its standard output.
Use this for coding challenges, calculations, or data manipulation.
The code is executed in a restricted environment; it cannot access external files.
"""
output = io.StringIO()
try:
with contextlib.redirect_stdout(output):
exec(code, {})
return output.getvalue()
except Exception as e:
return f"Error executing code: {e}"
# 4. Chess Tool
@tool
def get_chess_move(fen: str) -> str:
"""
Finds the best chess move for a given board position in FEN format.
Use this exclusively for chess-related questions.
"""
# Path to stockfish can be set via env var for flexibility in HF Spaces
stockfish_path = os.getenv("STOCKFISH_PATH", "/usr/games/stockfish")
if not os.path.exists(stockfish_path):
return f"Error: Stockfish engine not found at {stockfish_path}. Please set STOCKFISH_PATH environment variable."
try:
stockfish = Stockfish(path=stockfish_path)
stockfish.set_fen_position(fen)
best_move = stockfish.get_best_move()
return best_move
except Exception as e:
return f"Error analyzing chess position: {e}"
# --- Model and Agent Initialization ---
try:
model = InferenceClientModel(
model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
token=api_keys['together'],
provider="together"
)
logging.info("Model loaded successfully.")
except Exception as e:
logging.error(f"Failed to load model: {e}")
raise
# A single, powerful worker agent with a diverse toolset
worker_agent = ToolCallingAgent(
tools=[
google_search,
query_webpage,
query_youtube_video,
run_python_code,
get_chess_move,
],
model=model,
max_steps=5, # Sub-tasks should be short and focused
name="multi_tool_worker",
description="A specialized worker agent that can search the web, query videos, execute code, and play chess."
)
# The manager agent acts as a strategic dispatcher.
manager = CodeAgent(
model=model,
managed_agents=[worker_agent],
instructions="""
You are a master AI assistant responsible for answering a user's question. Your goal is to provide a single, precise, and final answer.
**Your Strategic Thought Process for GAIA Tasks:**
1. **ANALYZE THE QUESTION (Deep Understanding):**
* Carefully read and dissect the user's question. Identify all constraints, keywords, and the exact format required for the final answer (e.g., number, string, comma-separated list, specific units).
* Determine the core task: Is it a factual lookup, data extraction, code execution, video analysis, or a chess problem?
2. **FORMULATE A DETAILED PLAN (Multi-step if needed):**
* Based on your analysis, outline a step-by-step strategy. For complex questions (Level 2/3 GAIA), this plan might involve multiple tool calls and intermediate reasoning steps.
* **Prioritize Tools:**
* For general web searches or initial broad information gathering, prefer `WebSearchTool()`. It's often quicker for a first pass.
* If `WebSearchTool()` doesn't yield precise results, or if you need structured data (e.g., AI overviews, specific facts from search results), use `google_search` (SerpApi).
* For extracting specific information from a known webpage URL, use `query_webpage`.
* For questions about YouTube video content, use `query_youtube_video`.
* For computational tasks or code generation, use `run_python_code`.
* For chess problems, use `get_chess_move`.
* Consider potential pitfalls and how to recover (e.g., if a search yields no results, try a different query).
3. **EXECUTE AND ITERATE (Tool Delegation & Synthesis):**
* Delegate tasks to the `multi_tool_worker` agent, providing the exact tool and parameters.
* Carefully evaluate the output from each tool call.
* If the output is not sufficient, refine your query or try a different tool/approach. This is where iterative refinement and self-correction are crucial.
* Synthesize information from multiple sources if necessary to build the complete answer.
4. **FORMULATE THE FINAL ANSWER (Precision & Format):**
* Once you have definitively found the answer, format it *exactly* as requested in the original question.
* **DO NOT** add any extra text, explanations, or conversational filler. The final answer must be *only* the answer itself.
* Example: If the question asks for a number and the answer is "123", your output should be `FINAL ANSWER: 123`. If it asks for a string "New York", your output should be `FINAL ANSWER: New York`. If it asks for a comma-separated list "apple,banana", your output should be `FINAL ANSWER: apple,banana`.
"""
)
logging.info("Multi-task agent initialized successfully.")
return manager
# --- Main Execution Block for Local Testing ---
def main():
"""Main function for local testing of the agent."""
configure_logging()
try:
global SerpApiClientException
from serpapi.client import SerpApiClientException
agent = initialize_agent()
if agent:
# Example prompts for each category
prompts = {
"Web Search": "Who is the current CEO of OpenAI?",
"YouTube": "What is the main topic of the video https://www.youtube.com/watch?v=bZQun8Y4L2A regarding AI models?",
"Coding": "Write a Python script that calculates and prints the factorial of 5.",
"Chess": "What is the best move for the starting chess position? The FEN is 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'."
}
for category, prompt in prompts.items():
logging.info(f"\n--- Testing Category: {category} ---")
logging.info(f"Prompt: {prompt}")
response = agent.run(prompt)
logging.info(f"Agent's Final Answer: {response}")
logging.info("-" * (30 + len(category)))
except Exception as e:
logging.critical(f"An unhandled error occurred during local testing: {e}", exc_info=True)
if __name__ == "__main__":
# This allows you to test the agent's logic by running `python agent.py` locally.
main()