Spaces:

jlgaralc
/

Agent_Agents_Course

Sleeping

App Files Files Community

jesusgj commited on Jun 28

Commit

19b4b62

1 Parent(s): 871e0bb

Modified files

Browse files

Files changed (2) hide show

agent.py +141 -1786
requirements.txt +1 -0

agent.py CHANGED Viewed

@@ -8,17 +8,13 @@ import re
 import json
 from functools import lru_cache, wraps
 from typing import Optional, Dict, Any, List
-from datetime import datetime, timedelta
 from dotenv import load_dotenv
 from requests.exceptions import RequestException
-import requests
 import serpapi
 from llama_index.core import VectorStoreIndex, download_loader
 from llama_index.core.schema import Document
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-import pandas as pd
-import numpy as np
 # --- Correctly import the specific tools from smolagents ---
 from smolagents import (
@@ -39,16 +35,13 @@ def configure_logging():
         datefmt="%Y-%m-%d %H:%M:%S"
     )
-def load_api_keys():
-    """Loads API keys from environment variables with fallback."""
     load_dotenv()
     keys = {
         'together': os.getenv('TOGETHER_API_KEY'),
         'serpapi': os.getenv('SERPAPI_API_KEY'),
-        'hf_token': os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_HUB_TOKEN'),
     }
-    # Log which keys are available (without revealing the actual keys)
     for key_name, key_value in keys.items():
         if key_value:
             logging.info(f"✅ {key_name.upper()} API key loaded")
@@ -56,1890 +49,252 @@ def load_api_keys():
             logging.warning(f"⚠️ {key_name.upper()} API key not found")
     if not keys['together']:
-        raise ValueError("TOGETHER_API_KEY is required but not found in environment variables.")
     return keys
 # --- Custom Exceptions ---
-class SerpApiClientException(Exception):
-    pass
-class YouTubeTranscriptApiError(Exception):
-    pass
-class DataProcessingError(Exception):
-    pass
 # --- Enhanced Decorators ---
 def retry(max_retries=3, initial_delay=1, backoff=2):
-    """A robust retry decorator with exponential backoff and better error handling."""
     def decorator(func):
         @wraps(func)
         def wrapper(*args, **kwargs):
             delay = initial_delay
-            retryable_exceptions = (
-                RequestException,
-                SerpApiClientException,
-                YouTubeTranscriptApiError,
-                TranscriptsDisabled,
-                NoTranscriptFound,
-                ConnectionError,
-                TimeoutError
-            )
-            last_exception = None
             for attempt in range(1, max_retries + 1):
                 try:
                     return func(*args, **kwargs)
                 except retryable_exceptions as e:
-                    last_exception = e
                     if attempt == max_retries:
                         logging.error(f"{func.__name__} failed after {attempt} attempts: {e}")
-                        break
                     logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...")
                     time.sleep(delay)
                     delay *= backoff
                 except Exception as e:
                     logging.error(f"{func.__name__} failed with a non-retryable error: {e}")
-                    raise
-            # If we get here, all retries failed
-            return f"Error after {max_retries} attempts: {last_exception}"
         return wrapper
     return decorator
 # --- Enhanced Helper Functions ---
 def extract_video_id(url_or_id: str) -> Optional[str]:
-    """Extract YouTube video ID from various URL formats with better validation."""
-    if not url_or_id:
-        return None
-    # Clean the input
     url_or_id = url_or_id.strip()
-    # Check if it's already a video ID
     if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
         return url_or_id
-    # Various YouTube URL patterns
     patterns = [
-        r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
-        r'youtube\.com/.*[?&]v=([a-zA-Z0-9_-]{11})',
-        r'youtube-nocookie\.com/embed/([a-zA-Z0-9_-]{11})',
     ]
     for pattern in patterns:
         match = re.search(pattern, url_or_id)
         if match:
             return match.group(1)
     return None
 def clean_text_output(text: str) -> str:
-    """Clean and normalize text output for better processing."""
-    if not text:
-        return ""
-    # Remove excessive whitespace
-    text = re.sub(r'\s+', ' ', text.strip())
-    # Remove common prefixes that might interfere with answer extraction
-    prefixes_to_remove = [
-        "Based on the search results,",
-        "According to the information,",
-        "The answer is:",
-        "Result:",
-    ]
-    for prefix in prefixes_to_remove:
-        if text.lower().startswith(prefix.lower()):
-            text = text[len(prefix):].strip()
     return text
-def extract_numerical_answer(text: str) -> Optional[str]:
-    """Extract numerical answers from text with better precision."""
-    # Look for standalone numbers
-    number_patterns = [
-        r'\b(\d+\.?\d*)\b',  # Decimal numbers
-        r'\b(\d+/\d+)\b',    # Fractions
-        r'\b(\d+,\d+(?:,\d+)*)\b',  # Numbers with commas
-    ]
-    for pattern in number_patterns:
-        matches = re.findall(pattern, text)
-        if matches:
-            return matches[-1]  # Return the last match (often the final answer)
-    return None
-# --- Answer Extraction Functions ---
 def extract_final_answer(response: str) -> str:
-    """
-    Extract the final answer from agent response following GAIA format requirements.
-    Looks for 'FINAL ANSWER:' pattern and extracts the answer after it.
-    """
-    if not response:
-        return ""
-    # Look for FINAL ANSWER pattern (case insensitive)
-    final_answer_pattern = re.compile(r'FINAL\s+ANSWER\s*:\s*(.+?)(?:\n|$)', re.IGNORECASE | re.DOTALL)
-    match = final_answer_pattern.search(response)
     if match:
-        answer = match.group(1).strip()
-        # Clean up common formatting issues
-        answer = re.sub(r'\s+', ' ', answer)  # Normalize whitespace
-        answer = answer.rstrip('.')  # Remove trailing periods
-        return answer
-    # Fallback: if no FINAL ANSWER found, try to extract from end of response
     lines = response.strip().split('\n')
-    if lines:
-        last_line = lines[-1].strip()
-        # Remove common prefixes
-        for prefix in ['Answer:', 'Result:', 'The answer is:', 'Final result:']:
-            if last_line.lower().startswith(prefix.lower()):
-                return last_line[len(prefix):].strip()
-        return last_line
-    return response.strip()
-def normalize_answer_format(answer: str, expected_type: str = "auto") -> str:
-    """
-    Normalize answer format according to GAIA requirements.
-    Args:
-        answer: The extracted answer
-        expected_type: "number", "string", "list", or "auto" to detect
-    """
-    if not answer:
-        return answer
-    answer = answer.strip()
-    # Auto-detect type if not specified
-    if expected_type == "auto":
-        # Try to detect a list (comma-separated, at least two elements)
-        if ',' in answer and len([x for x in answer.split(',') if x.strip()]) > 1:
-            expected_type = "list"
-        # Try to detect a number (integer or float, possibly negative)
-        elif re.fullmatch(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?', answer.replace(',', '').strip()):
-            expected_type = "number"
-        # Otherwise, treat as string
-        else:
-            expected_type = "string"
-def initialize_agent():
-    """
-    Initializes an enhanced multi-disciplinary agent optimized for GAIA benchmark questions.
-    """
-    configure_logging()
-    logging.info("🚀 Starting GAIA agent initialization...")
-    try:
-        api_keys = load_api_keys()
-    except Exception as e:
-        logging.error(f"Failed to load API keys: {e}")
-        return None
-    # --- Enhanced Caching Layer for LlamaIndex ---
-    @lru_cache(maxsize=64)  # Increased cache size
-    @retry(max_retries=3)
-    def get_webpage_index(url: str) -> VectorStoreIndex:
-        logging.info(f"📄 Indexing webpage: {url}")
-        try:
-            loader_cls = download_loader("BeautifulSoupWebReader")
-            loader = loader_cls()
-            docs = loader.load_data(urls=[url])
-            if not docs:
-                raise ValueError(f"No content could be extracted from {url}")
-            # Filter out very short documents
-            valid_docs = [doc for doc in docs if len(doc.text.strip()) > 50]
-            if not valid_docs:
-                raise ValueError(f"No substantial content found in {url}")
-            return VectorStoreIndex.from_documents(valid_docs)
-        except Exception as e:
-            logging.error(f"Error indexing webpage {url}: {e}")
-            raise
-    @lru_cache(maxsize=32)
-    @retry(max_retries=3)
-    def get_youtube_index(video_id: str) -> VectorStoreIndex:
-        logging.info(f"🎥 Indexing YouTube video: {video_id}")
-        try:
-            # Try to get English transcript first
-            try:
-                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
-            except (TranscriptsDisabled, NoTranscriptFound):
-                # Try auto-generated or any available transcript
-                transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-                try:
-                    transcript = transcript_list.find_transcript(['en']).fetch()
-                except:
-                    # Get any available transcript
-                    available_transcripts = list(transcript_list)
-                    if not available_transcripts:
-                        raise YouTubeTranscriptApiError(f"No transcripts available for video {video_id}")
-                    transcript = available_transcripts[0].fetch()
-            if not transcript:
-                raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
-            # Combine transcript with timestamps for better context
-            text_segments = []
-            for entry in transcript:
-                timestamp = int(entry.get('start', 0))
-                text = entry.get('text', '').strip()
-                if text:
-                    text_segments.append(f"[{timestamp}s] {text}")
-            full_text = ' '.join(text_segments)
-            if not full_text.strip():
-                raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
-            doc = Document(
-                text=full_text,
-                doc_id=f"youtube_{video_id}",
-                metadata={"source": f"https://youtube.com/watch?v={video_id}"}
-            )
-            return VectorStoreIndex.from_documents([doc])
-        except Exception as e:
-            logging.error(f"Error indexing YouTube video {video_id}: {e}")
-            raise
-    # --- Enhanced Tool Definitions ---
-    @tool
-    def advanced_web_query(url: str, query: str) -> str:
-        """
-        Extract specific information from a webpage using advanced querying.
-        Handles various content types and provides detailed responses.
-        Args:
-            url: The webpage URL to analyze
-            query: Specific question to ask about the content
-        """
-        try:
-            if not url.startswith(('http://', 'https://')):
-                url = 'https://' + url
-            logging.info(f"🔍 Querying webpage: {url} with query: {query}")
-            index = get_webpage_index(url)
-            query_engine = index.as_query_engine(
-                similarity_top_k=8,  # Increased for better coverage
-                response_mode="tree_summarize",
-                verbose=True
-            )
-            response = query_engine.query(query)
-            result = clean_text_output(str(response))
-            # If the response seems incomplete, try a broader query
-            if len(result) < 50 and "not found" not in result.lower():
-                broader_query = f"Information about {query.split()[-1] if query.split() else query}"
-                broader_response = query_engine.query(broader_query)
-                broader_result = clean_text_output(str(broader_response))
-                if len(broader_result) > len(result):
-                    result = broader_result
-            return result
-        except Exception as e:
-            error_msg = f"Error querying webpage {url}: {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_youtube_query(video_url_or_id: str, query: str) -> str:
-        """
-        Extract information from YouTube video transcripts with enhanced processing.
-        Handles timestamps and provides contextual responses.
-        Args:
-            video_url_or_id: YouTube URL or video ID
-            query: Specific question about the video content
-        """
-        try:
-            video_id = extract_video_id(video_url_or_id)
-            if not video_id:
-                return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
-            logging.info(f"🎬 Querying YouTube video: {video_id} with query: {query}")
-            index = get_youtube_index(video_id)
-            query_engine = index.as_query_engine(
-                similarity_top_k=6,
-                response_mode="tree_summarize",
-                verbose=True
-            )
-            response = query_engine.query(query)
-            result = clean_text_output(str(response))
-            return result
-        except YouTubeTranscriptApiError as e:
-            error_msg = f"YouTube transcript error for {video_url_or_id}: {e}"
-            logging.error(error_msg)
-            return error_msg
-        except Exception as e:
-            error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_python_execution(code: str) -> str:
-        """
-        Execute Python code with enhanced capabilities and error handling.
-        Includes mathematical, data processing, and web scraping capabilities.
-        Args:
-            code: Python code to execute
-        """
-        # Expanded safe globals with more libraries
-        safe_globals = {}
-        try:
-            # Basic Python modules
-            import math, datetime, json, re, collections, itertools, random
-            from fractions import Fraction
-            from decimal import Decimal
-            import statistics
-            safe_globals.update({
-                'math': math, 'datetime': datetime, 'json': json, 're': re,
-                'collections': collections, 'itertools': itertools, 'random': random,
-                'Fraction': Fraction, 'Decimal': Decimal, 'statistics': statistics
-            })
-            # Scientific computing
-            try:
-                import numpy as np
-                safe_globals['np'] = np
-                safe_globals['numpy'] = np
-            except ImportError:
-                logging.warning("NumPy not available")
-            try:
-                import pandas as pd
-                safe_globals['pd'] = pd
-                safe_globals['pandas'] = pd
-            except ImportError:
-                logging.warning("Pandas not available")
-            # Web requests for data fetching
-            try:
-                import requests
-                safe_globals['requests'] = requests
-            except ImportError:
-                logging.warning("Requests not available")
-        except ImportError as e:
-            logging.warning(f"Some modules not available: {e}")
-        # Capture both stdout and stderr
-        stdout_capture = io.StringIO()
-        stderr_capture = io.StringIO()
-        try:
-            logging.info(f"🐍 Executing Python code: {code[:100]}...")
-            with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
-                # Use exec with restricted builtins for safety
-                restricted_builtins = {
-                    'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
-                    'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
-                    'enumerate': enumerate, 'filter': filter, 'float': float,
-                    'format': format, 'hex': hex, 'int': int, 'len': len,
-                    'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
-                    'ord': ord, 'pow': pow, 'print': print, 'range': range,
-                    'repr': repr, 'reversed': reversed, 'round': round,
-                    'set': set, 'sorted': sorted, 'str': str, 'sum': sum,
-                    'tuple': tuple, 'type': type, 'zip': zip,
-                }
-                exec(code, {"__builtins__": restricted_builtins}, safe_globals)
-            stdout_result = stdout_capture.getvalue()
-            stderr_result = stderr_capture.getvalue()
-            # Combine outputs
-            result_parts = []
-            if stdout_result.strip():
-                result_parts.append(stdout_result.strip())
-            if stderr_result.strip():
-                result_parts.append(f"Warnings/Errors: {stderr_result.strip()}")
-            if result_parts:
-                return '\n'.join(result_parts)
-            else:
-                return "Code executed successfully (no output)"
-        except Exception as e:
-            error_msg = f"Code execution error: {e}"
-            stderr_result = stderr_capture.getvalue()
-            if stderr_result.strip():
-                error_msg += f"\nAdditional details: {stderr_result.strip()}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_wikipedia_search(query: str, detailed: bool = True) -> str:
-        """
-        Search Wikipedia with enhanced content extraction and error handling.
-        Args:
-            query: Search term
-            detailed: Whether to return detailed information or just summary
-        """
-        try:
-            import wikipedia
-            wikipedia.set_lang("en")
-            wikipedia.set_rate_limiting(True)
-            logging.info(f"📚 Searching Wikipedia for: {query}")
-            # Handle disambiguation and search suggestions
-            try:
-                page = wikipedia.page(query, auto_suggest=True)
-            except wikipedia.DisambiguationError as e:
-                # Take the first option from disambiguation
-                if e.options:
-                    page = wikipedia.page(e.options[0])
-                else:
-                    return f"Wikipedia disambiguation error for '{query}': {e}"
-            except wikipedia.PageError:
-                # Try searching if direct page lookup fails
-                search_results = wikipedia.search(query, results=3)
-                if search_results:
-                    page = wikipedia.page(search_results[0])
-                else:
-                    return f"No Wikipedia results found for '{query}'"
-            if detailed:
-                # Get more comprehensive content
-                content_sections = []
-                content_sections.append(f"**{page.title}**")
-                content_sections.append(f"Summary: {page.summary}")
-                # Add first few sections if available
-                if hasattr(page, 'content') and page.content:
-                    sections = page.content.split('\n\n')[:3]  # First 3 paragraphs
-                    for section in sections:
-                        if section.strip() and len(section) > 50:
-                            content_sections.append(section.strip())
-                content_sections.append(f"Source: {page.url}")
-                return '\n\n'.join(content_sections)
-            else:
-                return f"**{page.title}**\n\n{page.summary}\n\nSource: {page.url}"
-        except ImportError:
-            return "Wikipedia library not installed. Cannot perform search."
-        except Exception as e:
-            error_msg = f"Wikipedia search error for '{query}': {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def data_processing_tool(data_description: str, operation: str) -> str:
-        """
-        Process and analyze data based on descriptions and operations.
-        Useful for mathematical calculations, data analysis, and structured data processing.
-        Args:
-            data_description: Description of the data or data source
-            operation: The operation to perform (calculate, analyze, extract, etc.)
-        """
-        try:
-            logging.info(f"📊 Processing data: {data_description} | Operation: {operation}")
-            # This tool is designed to work with the Python execution tool
-            # for complex data processing tasks
-            code_template = f"""
-# Data processing task: {operation}
-# Data description: {data_description}
-# Add your specific data processing logic here
-# This is a template - specific implementation depends on the data and operation
-print("Data processing task initiated")
-print(f"Description: {data_description}")
-print(f"Operation: {operation}")
-# Example operations:
-if "calculate" in "{operation}".lower():
-    print("Performing calculation...")
-elif "analyze" in "{operation}".lower():
-    print("Performing analysis...")
-elif "extract" in "{operation}".lower():
-    print("Extracting information...")
-print("Task completed - use enhanced_python_execution for specific calculations")
-"""
-            return enhanced_python_execution(code_template)
-        except Exception as e:
-            error_msg = f"Data processing error: {e}"
-            logging.error(error_msg)
-            return error_msg
-    # --- Model and Agent Setup ---
     try:
-        # Use a more capable model for better performance
-        model = InferenceClientModel(
-            model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo",  # Upgraded model
-            token=api_keys['together'],
-            provider="together"
-        )
-        logging.info("✅ Model loaded successfully")
-    except Exception as e:
-        logging.error(f"Failed to load primary model, falling back: {e}")
-        try:
-            # Fallback model
-            model = InferenceClientModel(
-                model_id="Qwen/Qwen2.5-7B-Instruct",
-                token=api_keys['together'],
-                provider="together"
-            )
-            logging.info("✅ Fallback model loaded successfully")
-        except Exception as e2:
-            logging.error(f"Failed to load fallback model: {e2}")
-            raise
-    # Configure Google Search tool
-    google_search_tool = None
-    if api_keys['serpapi']:
-        try:
-            google_search_tool = GoogleSearchTool(
-                provider='serpapi',
-                serpapi_api_key=api_keys['serpapi']
-            )
-            logging.info("✅ Google Search tool configured")
-        except Exception as e:
-            logging.warning(f"Failed to configure Google Search tool: {e}")
-    # Prepare tools list
-    tools_list = [
-        enhanced_wikipedia_search,
-        advanced_web_query,
-        enhanced_youtube_query,
-        enhanced_python_execution,
-        data_processing_tool,
-    ]
-    if google_search_tool:
-        tools_list.insert(0, google_search_tool)
-    # Specialized worker agent with comprehensive toolset
-    worker_agent = ToolCallingAgent(
-        tools=tools_list,
-        model=model,
-        max_steps=8,  # Increased for complex tasks
-        name="gaia_specialist",
-        description="Advanced specialist agent for GAIA benchmark: web research, document analysis, video processing, mathematical computation, and data analysis."
-    )
-    # Enhanced strategic manager agent
-    manager_tools = []
-    if google_search_tool:
-        manager_tools.append(google_search_tool)
-    manager = CodeAgent(
-        model=model,
-        managed_agents=[worker_agent],
-        tools=manager_tools,
-        instructions="""You are a general AI assistant designed for the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions that require deep reasoning and analysis.
-**CRITICAL: ANSWER FORMAT REQUIREMENT**
-You MUST finish your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
-YOUR FINAL ANSWER formatting rules:
-- For NUMBERS: No commas, no units (like $ or %), no additional text
-  Example: "FINAL ANSWER: 42" NOT "FINAL ANSWER: 42 dollars" or "FINAL ANSWER: $42"
-- For STRINGS: No articles (a, an, the), no abbreviations, write digits in plain text
-  Example: "FINAL ANSWER: New York City" NOT "FINAL ANSWER: NYC" or "FINAL ANSWER: The Big Apple"
-- For LISTS: Comma-separated, apply above rules to each element
-  Example: "FINAL ANSWER: Paris, London, Berlin" or "FINAL ANSWER: 1.5, 2.3, 4.7"
-**STRATEGIC APPROACH:**
-1. **ANALYZE THE QUESTION**: Determine what type of answer is expected (number, string, or list)
-2. **DECOMPOSE THE PROBLEM**: Break complex questions into sub-problems:
-   - Identify required information sources
-   - Plan tool usage sequence
-   - Consider verification steps
-3. **TOOL SELECTION**:
-   - Use GoogleSearchTool for current information and general web queries
-   - Delegate to gaia_specialist for complex multi-tool analysis:
-     * advanced_web_query: Deep webpage content analysis
-     * enhanced_youtube_query: Video transcript analysis
-     * enhanced_python_execution: Mathematical calculations and data processing
-     * enhanced_wikipedia_search: Encyclopedic knowledge
-     * data_processing_tool: Structured data analysis
-4. **VERIFICATION**: Cross-check critical information and validate calculations
-**DELEGATION EXAMPLES**:
-Simple queries:
-```python
-# Direct search for current information
-result = search_tool.run("population Tokyo 2024")
-# Extract and format the answer properly
-```
-Complex analysis:
-```python
-# Delegate comprehensive tasks to specialist
-answer = gaia_specialist.run('''
-Find the founding year of the company mentioned in this video: [URL],
-calculate years from founding to 2024,
-then identify a major historical event from that founding year.
-Format the final answer according to GAIA requirements.
-''')
-```
-**RESPONSE STRUCTURE**:
-1. Show your reasoning and steps
-2. Use tools to gather information
-3. Verify your findings
-4. Format the final answer correctly
-5. End with "FINAL ANSWER: [answer]"
-**EXAMPLES OF PROPER FORMATTING**:
-- Question asks for a year: "FINAL ANSWER: 1991"
-- Question asks for a city: "FINAL ANSWER: San Francisco"
-- Question asks for a percentage: "FINAL ANSWER: 25" (not "25%" unless specified)
-- Question asks for a list of countries: "FINAL ANSWER: France, Germany, Italy"
-- Question asks for a calculation result: "FINAL ANSWER: 456"
-Remember: Be methodical, verify your information, and always end with the properly formatted FINAL ANSWER."""
-    )
-    logging.info("🎯 Enhanced GAIA agent initialized successfully!")
-    # Return wrapped agent that ensures GAIA format compliance
-    return create_gaia_agent_wrapper(manager)
-# --- Main Execution Block for Local Testing ---
-def main():
-    """Test the agent with sample GAIA-style questions."""
-    configure_logging()
-    logging.info("🧪 Starting local testing...")
-    try:
-        agent = initialize_agent()
-        if not agent:
-            logging.error("Agent initialization failed")
-            return
-        # More challenging test questions similar to GAIA
-        test_questions = [
-            "What is 15! / (12! * 3!) ?",
-            "In what year was the Python programming language first released?",
-            "What is the square root of 2,025?",
-        ]
-        for i, question in enumerate(test_questions, 1):
-            logging.info(f"\n{'='*60}")
-            logging.info(f"🔍 Test Question {i}: {question}")
-            logging.info('='*60)
-            start_time = time.time()
-            try:
-                # The agent wrapper now handles GAIA format compliance
-                response = agent(question)
-                elapsed_time = time.time() - start_time
-                logging.info(f"✅ Final Answer: {response}")
-                logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
-            except Exception as e:
-                logging.error(f"❌ Error processing question {i}: {e}")
-            time.sleep(2)  # Prevent rate limiting
-        logging.info(f"\n{'='*60}")
-        logging.info("🏁 Testing completed!")
-        logging.info('='*60)
-    except Exception as e:
-        logging.critical(f"💥 Critical error during testing: {e}", exc_info=True)
-if __name__ == "__main__":
-    main(), answer:
-            expected_type = "number" if ',' not in answer or answer.count(',') < 2 else "list"
-        elif ',' in answer and len(answer.split(',')) > 1:
-            expected_type = "list"
-        else:
-            expected_type = "string"
-    if expected_type == "number":
-        # Remove commas and units for numbers
-        answer = re.sub(r'[,$%]', '', answer)
-        answer = re.sub(r'\s+', '', answer)
-        # Keep only number and decimal point
-        number_match = re.search(r'[\d.-]+', answer)
-        if number_match:
-            return number_match.group(0)
-    elif expected_type == "string":
-        # Remove articles and normalize
-        answer = re.sub(r'\b(a|an|the)\s+', '', answer, flags=re.IGNORECASE)
-        answer = re.sub(r'\s+', ' ', answer).strip()
-        # Expand common abbreviations
-        abbreviations = {
-            'NYC': 'New York City',
-            'LA': 'Los Angeles',
-            'SF': 'San Francisco',
-            'US': 'United States',
-            'UK': 'United Kingdom',
-            'EU': 'European Union'
-        }
-        for abbr, full in abbreviations.items():
-            if answer.upper() == abbr:
-                answer = full
-                break
-    elif expected_type == "list":
-        # Process each element in the list
-        elements = [elem.strip() for elem in answer.split(',')]
-        normalized_elements = []
-        for elem in elements:
-            if re.match(r'^[\d.-]+', elem):
 def initialize_agent():
-    """
-    Initializes an enhanced multi-disciplinary agent optimized for GAIA benchmark questions.
-    """
     configure_logging()
     logging.info("🚀 Starting GAIA agent initialization...")
     try:
         api_keys = load_api_keys()
-    except Exception as e:
-        logging.error(f"Failed to load API keys: {e}")
         return None
-    # --- Enhanced Caching Layer for LlamaIndex ---
-    @lru_cache(maxsize=64)  # Increased cache size
-    @retry(max_retries=3)
-    def get_webpage_index(url: str) -> VectorStoreIndex:
-        logging.info(f"📄 Indexing webpage: {url}")
-        try:
-            loader_cls = download_loader("BeautifulSoupWebReader")
-            loader = loader_cls()
-            docs = loader.load_data(urls=[url])
-            if not docs:
-                raise ValueError(f"No content could be extracted from {url}")
-            # Filter out very short documents
-            valid_docs = [doc for doc in docs if len(doc.text.strip()) > 50]
-            if not valid_docs:
-                raise ValueError(f"No substantial content found in {url}")
-            return VectorStoreIndex.from_documents(valid_docs)
-        except Exception as e:
-            logging.error(f"Error indexing webpage {url}: {e}")
-            raise
-    @lru_cache(maxsize=32)
-    @retry(max_retries=3)
-    def get_youtube_index(video_id: str) -> VectorStoreIndex:
-        logging.info(f"🎥 Indexing YouTube video: {video_id}")
-        try:
-            # Try to get English transcript first
-            try:
-                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
-            except (TranscriptsDisabled, NoTranscriptFound):
-                # Try auto-generated or any available transcript
-                transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-                try:
-                    transcript = transcript_list.find_transcript(['en']).fetch()
-                except:
-                    # Get any available transcript
-                    available_transcripts = list(transcript_list)
-                    if not available_transcripts:
-                        raise YouTubeTranscriptApiError(f"No transcripts available for video {video_id}")
-                    transcript = available_transcripts[0].fetch()
-            if not transcript:
-                raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
-            # Combine transcript with timestamps for better context
-            text_segments = []
-            for entry in transcript:
-                timestamp = int(entry.get('start', 0))
-                text = entry.get('text', '').strip()
-                if text:
-                    text_segments.append(f"[{timestamp}s] {text}")
-            full_text = ' '.join(text_segments)
-            if not full_text.strip():
-                raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
-            doc = Document(
-                text=full_text,
-                doc_id=f"youtube_{video_id}",
-                metadata={"source": f"https://youtube.com/watch?v={video_id}"}
-            )
-            return VectorStoreIndex.from_documents([doc])
-        except Exception as e:
-            logging.error(f"Error indexing YouTube video {video_id}: {e}")
-            raise
-    # --- Enhanced Tool Definitions ---
-    @tool
-    def advanced_web_query(url: str, query: str) -> str:
-        """
-        Extract specific information from a webpage using advanced querying.
-        Handles various content types and provides detailed responses.
-        Args:
-            url: The webpage URL to analyze
-            query: Specific question to ask about the content
-        """
-        try:
-            if not url.startswith(('http://', 'https://')):
-                url = 'https://' + url
-            logging.info(f"🔍 Querying webpage: {url} with query: {query}")
-            index = get_webpage_index(url)
-            query_engine = index.as_query_engine(
-                similarity_top_k=8,  # Increased for better coverage
-                response_mode="tree_summarize",
-                verbose=True
-            )
-            response = query_engine.query(query)
-            result = clean_text_output(str(response))
-            # If the response seems incomplete, try a broader query
-            if len(result) < 50 and "not found" not in result.lower():
-                broader_query = f"Information about {query.split()[-1] if query.split() else query}"
-                broader_response = query_engine.query(broader_query)
-                broader_result = clean_text_output(str(broader_response))
-                if len(broader_result) > len(result):
-                    result = broader_result
-            return result
-        except Exception as e:
-            error_msg = f"Error querying webpage {url}: {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_youtube_query(video_url_or_id: str, query: str) -> str:
-        """
-        Extract information from YouTube video transcripts with enhanced processing.
-        Handles timestamps and provides contextual responses.
-        Args:
-            video_url_or_id: YouTube URL or video ID
-            query: Specific question about the video content
-        """
-        try:
-            video_id = extract_video_id(video_url_or_id)
-            if not video_id:
-                return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
-            logging.info(f"🎬 Querying YouTube video: {video_id} with query: {query}")
-            index = get_youtube_index(video_id)
-            query_engine = index.as_query_engine(
-                similarity_top_k=6,
-                response_mode="tree_summarize",
-                verbose=True
-            )
-            response = query_engine.query(query)
-            result = clean_text_output(str(response))
-            return result
-        except YouTubeTranscriptApiError as e:
-            error_msg = f"YouTube transcript error for {video_url_or_id}: {e}"
-            logging.error(error_msg)
-            return error_msg
-        except Exception as e:
-            error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
-            logging.error(error_msg)
-            return error_msg
     @tool
     def enhanced_python_execution(code: str) -> str:
-        """
-        Execute Python code with enhanced capabilities and error handling.
-        Includes mathematical, data processing, and web scraping capabilities.
-        Args:
-            code: Python code to execute
-        """
-        # Expanded safe globals with more libraries
-        safe_globals = {}
-        try:
-            # Basic Python modules
-            import math, datetime, json, re, collections, itertools, random
-            from fractions import Fraction
-            from decimal import Decimal
-            import statistics
-            safe_globals.update({
-                'math': math, 'datetime': datetime, 'json': json, 're': re,
-                'collections': collections, 'itertools': itertools, 'random': random,
-                'Fraction': Fraction, 'Decimal': Decimal, 'statistics': statistics
-            })
-            # Scientific computing
-            try:
-                import numpy as np
-                safe_globals['np'] = np
-                safe_globals['numpy'] = np
-            except ImportError:
-                logging.warning("NumPy not available")
-            try:
-                import pandas as pd
-                safe_globals['pd'] = pd
-                safe_globals['pandas'] = pd
-            except ImportError:
-                logging.warning("Pandas not available")
-            # Web requests for data fetching
-            try:
-                import requests
-                safe_globals['requests'] = requests
-            except ImportError:
-                logging.warning("Requests not available")
-        except ImportError as e:
-            logging.warning(f"Some modules not available: {e}")
-        # Capture both stdout and stderr
         stdout_capture = io.StringIO()
-        stderr_capture = io.StringIO()
         try:
-            logging.info(f"🐍 Executing Python code: {code[:100]}...")
-            with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
-                # Use exec with restricted builtins for safety
-                restricted_builtins = {
-                    'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
-                    'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
-                    'enumerate': enumerate, 'filter': filter, 'float': float,
-                    'format': format, 'hex': hex, 'int': int, 'len': len,
-                    'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
-                    'ord': ord, 'pow': pow, 'print': print, 'range': range,
-                    'repr': repr, 'reversed': reversed, 'round': round,
-                    'set': set, 'sorted': sorted, 'str': str, 'sum': sum,
-                    'tuple': tuple, 'type': type, 'zip': zip,
-                }
                 exec(code, {"__builtins__": restricted_builtins}, safe_globals)
-            stdout_result = stdout_capture.getvalue()
-            stderr_result = stderr_capture.getvalue()
-            # Combine outputs
-            result_parts = []
-            if stdout_result.strip():
-                result_parts.append(stdout_result.strip())
-            if stderr_result.strip():
-                result_parts.append(f"Warnings/Errors: {stderr_result.strip()}")
-            if result_parts:
-                return '\n'.join(result_parts)
-            else:
-                return "Code executed successfully (no output)"
         except Exception as e:
             error_msg = f"Code execution error: {e}"
-            stderr_result = stderr_capture.getvalue()
-            if stderr_result.strip():
-                error_msg += f"\nAdditional details: {stderr_result.strip()}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_wikipedia_search(query: str, detailed: bool = True) -> str:
-        """
-        Search Wikipedia with enhanced content extraction and error handling.
-        Args:
-            query: Search term
-            detailed: Whether to return detailed information or just summary
-        """
-        try:
-            import wikipedia
-            wikipedia.set_lang("en")
-            wikipedia.set_rate_limiting(True)
-            logging.info(f"📚 Searching Wikipedia for: {query}")
-            # Handle disambiguation and search suggestions
-            try:
-                page = wikipedia.page(query, auto_suggest=True)
-            except wikipedia.DisambiguationError as e:
-                # Take the first option from disambiguation
-                if e.options:
-                    page = wikipedia.page(e.options[0])
-                else:
-                    return f"Wikipedia disambiguation error for '{query}': {e}"
-            except wikipedia.PageError:
-                # Try searching if direct page lookup fails
-                search_results = wikipedia.search(query, results=3)
-                if search_results:
-                    page = wikipedia.page(search_results[0])
-                else:
-                    return f"No Wikipedia results found for '{query}'"
-            if detailed:
-                # Get more comprehensive content
-                content_sections = []
-                content_sections.append(f"**{page.title}**")
-                content_sections.append(f"Summary: {page.summary}")
-                # Add first few sections if available
-                if hasattr(page, 'content') and page.content:
-                    sections = page.content.split('\n\n')[:3]  # First 3 paragraphs
-                    for section in sections:
-                        if section.strip() and len(section) > 50:
-                            content_sections.append(section.strip())
-                content_sections.append(f"Source: {page.url}")
-                return '\n\n'.join(content_sections)
-            else:
-                return f"**{page.title}**\n\n{page.summary}\n\nSource: {page.url}"
-        except ImportError:
-            return "Wikipedia library not installed. Cannot perform search."
-        except Exception as e:
-            error_msg = f"Wikipedia search error for '{query}': {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def data_processing_tool(data_description: str, operation: str) -> str:
-        """
-        Process and analyze data based on descriptions and operations.
-        Useful for mathematical calculations, data analysis, and structured data processing.
-        Args:
-            data_description: Description of the data or data source
-            operation: The operation to perform (calculate, analyze, extract, etc.)
-        """
-        try:
-            logging.info(f"📊 Processing data: {data_description} | Operation: {operation}")
-            # This tool is designed to work with the Python execution tool
-            # for complex data processing tasks
-            code_template = f"""
-# Data processing task: {operation}
-# Data description: {data_description}
-# Add your specific data processing logic here
-# This is a template - specific implementation depends on the data and operation
-print("Data processing task initiated")
-print(f"Description: {data_description}")
-print(f"Operation: {operation}")
-# Example operations:
-if "calculate" in "{operation}".lower():
-    print("Performing calculation...")
-elif "analyze" in "{operation}".lower():
-    print("Performing analysis...")
-elif "extract" in "{operation}".lower():
-    print("Extracting information...")
-print("Task completed - use enhanced_python_execution for specific calculations")
-"""
-            return enhanced_python_execution(code_template)
-        except Exception as e:
-            error_msg = f"Data processing error: {e}"
             logging.error(error_msg)
             return error_msg
     # --- Model and Agent Setup ---
     try:
-        # Use a more capable model for better performance
         model = InferenceClientModel(
-            model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo",  # Upgraded model
             token=api_keys['together'],
             provider="together"
         )
-        logging.info("✅ Model loaded successfully")
-    except Exception as e:
-        logging.error(f"Failed to load primary model, falling back: {e}")
-        try:
-            # Fallback model
-            model = InferenceClientModel(
-                model_id="Qwen/Qwen2.5-7B-Instruct",
-                token=api_keys['together'],
-                provider="together"
-            )
-            logging.info("✅ Fallback model loaded successfully")
-        except Exception as e2:
-            logging.error(f"Failed to load fallback model: {e2}")
-            raise
-    # Configure Google Search tool
-    google_search_tool = None
-    if api_keys['serpapi']:
-        try:
-            google_search_tool = GoogleSearchTool(
-                provider='serpapi',
-                serpapi_api_key=api_keys['serpapi']
-            )
-            logging.info("✅ Google Search tool configured")
-        except Exception as e:
-            logging.warning(f"Failed to configure Google Search tool: {e}")
-    # Prepare tools list
-    tools_list = [
-        enhanced_wikipedia_search,
-        advanced_web_query,
-        enhanced_youtube_query,
-        enhanced_python_execution,
-        data_processing_tool,
-    ]
-    if google_search_tool:
-        tools_list.insert(0, google_search_tool)
-    # Specialized worker agent with comprehensive toolset
-    worker_agent = ToolCallingAgent(
-        tools=tools_list,
-        model=model,
-        max_steps=8,  # Increased for complex tasks
-        name="gaia_specialist",
-        description="Advanced specialist agent for GAIA benchmark: web research, document analysis, video processing, mathematical computation, and data analysis."
-    )
-    # Enhanced strategic manager agent
-    manager_tools = []
-    if google_search_tool:
-        manager_tools.append(google_search_tool)
-    manager = CodeAgent(
-        model=model,
-        managed_agents=[worker_agent],
-        tools=manager_tools,
-        instructions="""You are a general AI assistant designed for the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions that require deep reasoning and analysis.
-**CRITICAL: ANSWER FORMAT REQUIREMENT**
-You MUST finish your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
-YOUR FINAL ANSWER formatting rules:
-- For NUMBERS: No commas, no units (like $ or %), no additional text
-  Example: "FINAL ANSWER: 42" NOT "FINAL ANSWER: 42 dollars" or "FINAL ANSWER: $42"
-- For STRINGS: No articles (a, an, the), no abbreviations, write digits in plain text
-  Example: "FINAL ANSWER: New York City" NOT "FINAL ANSWER: NYC" or "FINAL ANSWER: The Big Apple"
-- For LISTS: Comma-separated, apply above rules to each element
-  Example: "FINAL ANSWER: Paris, London, Berlin" or "FINAL ANSWER: 1.5, 2.3, 4.7"
-**STRATEGIC APPROACH:**
-1. **ANALYZE THE QUESTION**: Determine what type of answer is expected (number, string, or list)
-2. **DECOMPOSE THE PROBLEM**: Break complex questions into sub-problems:
-   - Identify required information sources
-   - Plan tool usage sequence
-   - Consider verification steps
-3. **TOOL SELECTION**:
-   - Use GoogleSearchTool for current information and general web queries
-   - Delegate to gaia_specialist for complex multi-tool analysis:
-     * advanced_web_query: Deep webpage content analysis
-     * enhanced_youtube_query: Video transcript analysis
-     * enhanced_python_execution: Mathematical calculations and data processing
-     * enhanced_wikipedia_search: Encyclopedic knowledge
-     * data_processing_tool: Structured data analysis
-4. **VERIFICATION**: Cross-check critical information and validate calculations
-**DELEGATION EXAMPLES**:
-Simple queries:
-```python
-# Direct search for current information
-result = search_tool.run("population Tokyo 2024")
-# Extract and format the answer properly
-```
-Complex analysis:
-```python
-# Delegate comprehensive tasks to specialist
-answer = gaia_specialist.run('''
-Find the founding year of the company mentioned in this video: [URL],
-calculate years from founding to 2024,
-then identify a major historical event from that founding year.
-Format the final answer according to GAIA requirements.
-''')
-```
-**RESPONSE STRUCTURE**:
-1. Show your reasoning and steps
-2. Use tools to gather information
-3. Verify your findings
-4. Format the final answer correctly
-5. End with "FINAL ANSWER: [answer]"
-**EXAMPLES OF PROPER FORMATTING**:
-- Question asks for a year: "FINAL ANSWER: 1991"
-- Question asks for a city: "FINAL ANSWER: San Francisco"
-- Question asks for a percentage: "FINAL ANSWER: 25" (not "25%" unless specified)
-- Question asks for a list of countries: "FINAL ANSWER: France, Germany, Italy"
-- Question asks for a calculation result: "FINAL ANSWER: 456"
-Remember: Be methodical, verify your information, and always end with the properly formatted FINAL ANSWER."""
-    )
-    logging.info("🎯 Enhanced GAIA agent initialized successfully!")
-    return manager
-# --- Main Execution Block for Local Testing ---
-def main():
-    """Test the agent with sample GAIA-style questions."""
-    configure_logging()
-    logging.info("🧪 Starting local testing...")
-    try:
-        agent = initialize_agent()
-        if not agent:
-            logging.error("Agent initialization failed")
-            return
-        # More challenging test questions similar to GAIA
-        test_questions = [
-            "What is 15! / (12! * 3!) ?",
-            "In what year was the Python programming language first released?",
-            "What is the square root of 2,025?",
-        ]
-        for i, question in enumerate(test_questions, 1):
-            logging.info(f"\n{'='*60}")
-            logging.info(f"🔍 Test Question {i}: {question}")
-            logging.info('='*60)
-            start_time = time.time()
-            try:
-                response = agent.run(question)
-                elapsed_time = time.time() - start_time
-                logging.info(f"✅ Agent Answer: {response}")
-                logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
-            except Exception as e:
-                logging.error(f"❌ Error processing question {i}: {e}")
-            time.sleep(2)  # Prevent rate limiting
-        logging.info(f"\n{'='*60}")
-        logging.info("🏁 Testing completed!")
-        logging.info('='*60)
     except Exception as e:
-        logging.critical(f"💥 Critical error during testing: {e}", exc_info=True)
-if __name__ == "__main__":
-    main(), elem:
-                # It's a number
-                normalized_elements.append(normalize_answer_format(elem, "number"))
-            else:
-                # It's a string
-                normalized_elements.append(normalize_answer_format(elem, "string"))
-        return ', '.join(normalized_elements)
-    return answer
-# --- Enhanced Agent Wrapper ---
-def create_gaia_agent_wrapper(agent):
-    """
-    Create a wrapper around the agent that ensures GAIA format compliance.
-    """
-    def gaia_agent_run(question: str) -> str:
-        """
-        Run the agent with GAIA format compliance.
-        Returns only the final answer in the correct format.
-        """
-        try:
-            # Add explicit formatting instruction to the question
-            formatted_question = f"""
-{question}
-Remember to end your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
-Follow GAIA formatting rules:
-- Numbers: No commas, no units (unless specified)
-- Strings: No articles, no abbreviations, digits in plain text
-- Lists: Comma-separated following above rules for each element
-"""
-            # Get the full response from the agent
-            full_response = agent.run(formatted_question)
-            # Extract and normalize the final answer
-            final_answer = extract_final_answer(full_response)
-            normalized_answer = normalize_answer_format(final_answer)
-            logging.info(f"🎯 Question: {question}")
-            logging.info(f"🤖 Full response: {full_response}")
-            logging.info(f"✅ Final answer: {normalized_answer}")
-            return normalized_answer
-        except Exception as e:
-            error_msg = f"Agent execution error: {e}"
-            logging.error(error_msg)
-            return f"ERROR: {e}"
-    return gaia_agent_run
-def initialize_agent():
-    """
-    Initializes an enhanced multi-disciplinary agent optimized for GAIA benchmark questions.
-    """
-    configure_logging()
-    logging.info("🚀 Starting GAIA agent initialization...")
-    try:
-        api_keys = load_api_keys()
-    except Exception as e:
-        logging.error(f"Failed to load API keys: {e}")
-        return None
-    # --- Enhanced Caching Layer for LlamaIndex ---
-    @lru_cache(maxsize=64)  # Increased cache size
-    @retry(max_retries=3)
-    def get_webpage_index(url: str) -> VectorStoreIndex:
-        logging.info(f"📄 Indexing webpage: {url}")
-        try:
-            loader_cls = download_loader("BeautifulSoupWebReader")
-            loader = loader_cls()
-            docs = loader.load_data(urls=[url])
-            if not docs:
-                raise ValueError(f"No content could be extracted from {url}")
-            # Filter out very short documents
-            valid_docs = [doc for doc in docs if len(doc.text.strip()) > 50]
-            if not valid_docs:
-                raise ValueError(f"No substantial content found in {url}")
-            return VectorStoreIndex.from_documents(valid_docs)
-        except Exception as e:
-            logging.error(f"Error indexing webpage {url}: {e}")
-            raise
-    @lru_cache(maxsize=32)
-    @retry(max_retries=3)
-    def get_youtube_index(video_id: str) -> VectorStoreIndex:
-        logging.info(f"🎥 Indexing YouTube video: {video_id}")
-        try:
-            # Try to get English transcript first
-            try:
-                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
-            except (TranscriptsDisabled, NoTranscriptFound):
-                # Try auto-generated or any available transcript
-                transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-                try:
-                    transcript = transcript_list.find_transcript(['en']).fetch()
-                except:
-                    # Get any available transcript
-                    available_transcripts = list(transcript_list)
-                    if not available_transcripts:
-                        raise YouTubeTranscriptApiError(f"No transcripts available for video {video_id}")
-                    transcript = available_transcripts[0].fetch()
-            if not transcript:
-                raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
-            # Combine transcript with timestamps for better context
-            text_segments = []
-            for entry in transcript:
-                timestamp = int(entry.get('start', 0))
-                text = entry.get('text', '').strip()
-                if text:
-                    text_segments.append(f"[{timestamp}s] {text}")
-            full_text = ' '.join(text_segments)
-            if not full_text.strip():
-                raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
-            doc = Document(
-                text=full_text,
-                doc_id=f"youtube_{video_id}",
-                metadata={"source": f"https://youtube.com/watch?v={video_id}"}
-            )
-            return VectorStoreIndex.from_documents([doc])
-        except Exception as e:
-            logging.error(f"Error indexing YouTube video {video_id}: {e}")
-            raise
-    # --- Enhanced Tool Definitions ---
-    @tool
-    def advanced_web_query(url: str, query: str) -> str:
-        """
-        Extract specific information from a webpage using advanced querying.
-        Handles various content types and provides detailed responses.
-        Args:
-            url: The webpage URL to analyze
-            query: Specific question to ask about the content
-        """
-        try:
-            if not url.startswith(('http://', 'https://')):
-                url = 'https://' + url
-            logging.info(f"🔍 Querying webpage: {url} with query: {query}")
-            index = get_webpage_index(url)
-            query_engine = index.as_query_engine(
-                similarity_top_k=8,  # Increased for better coverage
-                response_mode="tree_summarize",
-                verbose=True
-            )
-            response = query_engine.query(query)
-            result = clean_text_output(str(response))
-            # If the response seems incomplete, try a broader query
-            if len(result) < 50 and "not found" not in result.lower():
-                broader_query = f"Information about {query.split()[-1] if query.split() else query}"
-                broader_response = query_engine.query(broader_query)
-                broader_result = clean_text_output(str(broader_response))
-                if len(broader_result) > len(result):
-                    result = broader_result
-            return result
-        except Exception as e:
-            error_msg = f"Error querying webpage {url}: {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_youtube_query(video_url_or_id: str, query: str) -> str:
-        """
-        Extract information from YouTube video transcripts with enhanced processing.
-        Handles timestamps and provides contextual responses.
-        Args:
-            video_url_or_id: YouTube URL or video ID
-            query: Specific question about the video content
-        """
-        try:
-            video_id = extract_video_id(video_url_or_id)
-            if not video_id:
-                return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
-            logging.info(f"🎬 Querying YouTube video: {video_id} with query: {query}")
-            index = get_youtube_index(video_id)
-            query_engine = index.as_query_engine(
-                similarity_top_k=6,
-                response_mode="tree_summarize",
-                verbose=True
-            )
-            response = query_engine.query(query)
-            result = clean_text_output(str(response))
-            return result
-        except YouTubeTranscriptApiError as e:
-            error_msg = f"YouTube transcript error for {video_url_or_id}: {e}"
-            logging.error(error_msg)
-            return error_msg
-        except Exception as e:
-            error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_python_execution(code: str) -> str:
-        """
-        Execute Python code with enhanced capabilities and error handling.
-        Includes mathematical, data processing, and web scraping capabilities.
-        Args:
-            code: Python code to execute
-        """
-        # Expanded safe globals with more libraries
-        safe_globals = {}
-        try:
-            # Basic Python modules
-            import math, datetime, json, re, collections, itertools, random
-            from fractions import Fraction
-            from decimal import Decimal
-            import statistics
-            safe_globals.update({
-                'math': math, 'datetime': datetime, 'json': json, 're': re,
-                'collections': collections, 'itertools': itertools, 'random': random,
-                'Fraction': Fraction, 'Decimal': Decimal, 'statistics': statistics
-            })
-            # Scientific computing
-            try:
-                import numpy as np
-                safe_globals['np'] = np
-                safe_globals['numpy'] = np
-            except ImportError:
-                logging.warning("NumPy not available")
-            try:
-                import pandas as pd
-                safe_globals['pd'] = pd
-                safe_globals['pandas'] = pd
-            except ImportError:
-                logging.warning("Pandas not available")
-            # Web requests for data fetching
-            try:
-                import requests
-                safe_globals['requests'] = requests
-            except ImportError:
-                logging.warning("Requests not available")
-        except ImportError as e:
-            logging.warning(f"Some modules not available: {e}")
-        # Capture both stdout and stderr
-        stdout_capture = io.StringIO()
-        stderr_capture = io.StringIO()
-        try:
-            logging.info(f"🐍 Executing Python code: {code[:100]}...")
-            with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
-                # Use exec with restricted builtins for safety
-                restricted_builtins = {
-                    'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
-                    'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
-                    'enumerate': enumerate, 'filter': filter, 'float': float,
-                    'format': format, 'hex': hex, 'int': int, 'len': len,
-                    'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
-                    'ord': ord, 'pow': pow, 'print': print, 'range': range,
-                    'repr': repr, 'reversed': reversed, 'round': round,
-                    'set': set, 'sorted': sorted, 'str': str, 'sum': sum,
-                    'tuple': tuple, 'type': type, 'zip': zip,
-                }
-                exec(code, {"__builtins__": restricted_builtins}, safe_globals)
-            stdout_result = stdout_capture.getvalue()
-            stderr_result = stderr_capture.getvalue()
-            # Combine outputs
-            result_parts = []
-            if stdout_result.strip():
-                result_parts.append(stdout_result.strip())
-            if stderr_result.strip():
-                result_parts.append(f"Warnings/Errors: {stderr_result.strip()}")
-            if result_parts:
-                return '\n'.join(result_parts)
-            else:
-                return "Code executed successfully (no output)"
-        except Exception as e:
-            error_msg = f"Code execution error: {e}"
-            stderr_result = stderr_capture.getvalue()
-            if stderr_result.strip():
-                error_msg += f"\nAdditional details: {stderr_result.strip()}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def enhanced_wikipedia_search(query: str, detailed: bool = True) -> str:
-        """
-        Search Wikipedia with enhanced content extraction and error handling.
-        Args:
-            query: Search term
-            detailed: Whether to return detailed information or just summary
-        """
-        try:
-            import wikipedia
-            wikipedia.set_lang("en")
-            wikipedia.set_rate_limiting(True)
-            logging.info(f"📚 Searching Wikipedia for: {query}")
-            # Handle disambiguation and search suggestions
-            try:
-                page = wikipedia.page(query, auto_suggest=True)
-            except wikipedia.DisambiguationError as e:
-                # Take the first option from disambiguation
-                if e.options:
-                    page = wikipedia.page(e.options[0])
-                else:
-                    return f"Wikipedia disambiguation error for '{query}': {e}"
-            except wikipedia.PageError:
-                # Try searching if direct page lookup fails
-                search_results = wikipedia.search(query, results=3)
-                if search_results:
-                    page = wikipedia.page(search_results[0])
-                else:
-                    return f"No Wikipedia results found for '{query}'"
-            if detailed:
-                # Get more comprehensive content
-                content_sections = []
-                content_sections.append(f"**{page.title}**")
-                content_sections.append(f"Summary: {page.summary}")
-                # Add first few sections if available
-                if hasattr(page, 'content') and page.content:
-                    sections = page.content.split('\n\n')[:3]  # First 3 paragraphs
-                    for section in sections:
-                        if section.strip() and len(section) > 50:
-                            content_sections.append(section.strip())
-                content_sections.append(f"Source: {page.url}")
-                return '\n\n'.join(content_sections)
-            else:
-                return f"**{page.title}**\n\n{page.summary}\n\nSource: {page.url}"
-        except ImportError:
-            return "Wikipedia library not installed. Cannot perform search."
-        except Exception as e:
-            error_msg = f"Wikipedia search error for '{query}': {e}"
-            logging.error(error_msg)
-            return error_msg
-    @tool
-    def data_processing_tool(data_description: str, operation: str) -> str:
-        """
-        Process and analyze data based on descriptions and operations.
-        Useful for mathematical calculations, data analysis, and structured data processing.
-        Args:
-            data_description: Description of the data or data source
-            operation: The operation to perform (calculate, analyze, extract, etc.)
-        """
-        try:
-            logging.info(f"📊 Processing data: {data_description} | Operation: {operation}")
-            # This tool is designed to work with the Python execution tool
-            # for complex data processing tasks
-            code_template = f"""
-# Data processing task: {operation}
-# Data description: {data_description}
-# Add your specific data processing logic here
-# This is a template - specific implementation depends on the data and operation
-print("Data processing task initiated")
-print(f"Description: {data_description}")
-print(f"Operation: {operation}")
-# Example operations:
-if "calculate" in "{operation}".lower():
-    print("Performing calculation...")
-elif "analyze" in "{operation}".lower():
-    print("Performing analysis...")
-elif "extract" in "{operation}".lower():
-    print("Extracting information...")
-print("Task completed - use enhanced_python_execution for specific calculations")
-"""
-            return enhanced_python_execution(code_template)
-        except Exception as e:
-            error_msg = f"Data processing error: {e}"
-            logging.error(error_msg)
-            return error_msg
-    # --- Model and Agent Setup ---
-    try:
-        # Use a more capable model for better performance
         model = InferenceClientModel(
-            model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo",  # Upgraded model
             token=api_keys['together'],
             provider="together"
         )
-        logging.info("✅ Model loaded successfully")
-    except Exception as e:
-        logging.error(f"Failed to load primary model, falling back: {e}")
-        try:
-            # Fallback model
-            model = InferenceClientModel(
-                model_id="Qwen/Qwen2.5-7B-Instruct",
-                token=api_keys['together'],
-                provider="together"
-            )
-            logging.info("✅ Fallback model loaded successfully")
-        except Exception as e2:
-            logging.error(f"Failed to load fallback model: {e2}")
-            raise
-    # Configure Google Search tool
-    google_search_tool = None
-    if api_keys['serpapi']:
-        try:
-            google_search_tool = GoogleSearchTool(
-                provider='serpapi',
-                serpapi_api_key=api_keys['serpapi']
-            )
-            logging.info("✅ Google Search tool configured")
-        except Exception as e:
-            logging.warning(f"Failed to configure Google Search tool: {e}")
-    # Prepare tools list
-    tools_list = [
-        enhanced_wikipedia_search,
-        advanced_web_query,
-        enhanced_youtube_query,
-        enhanced_python_execution,
-        data_processing_tool,
-    ]
-    if google_search_tool:
-        tools_list.insert(0, google_search_tool)
-    # Specialized worker agent with comprehensive toolset
-    worker_agent = ToolCallingAgent(
-        tools=tools_list,
-        model=model,
-        max_steps=8,  # Increased for complex tasks
-        name="gaia_specialist",
-        description="Advanced specialist agent for GAIA benchmark: web research, document analysis, video processing, mathematical computation, and data analysis."
-    )
-    # Enhanced strategic manager agent
-    manager_tools = []
-    if google_search_tool:
-        manager_tools.append(google_search_tool)
     manager = CodeAgent(
         model=model,
-        managed_agents=[worker_agent],
-        tools=manager_tools,
-        instructions="""You are a general AI assistant designed for the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions that require deep reasoning and analysis.
-**CRITICAL: ANSWER FORMAT REQUIREMENT**
-You MUST finish your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
-YOUR FINAL ANSWER formatting rules:
-- For NUMBERS: No commas, no units (like $ or %), no additional text
-  Example: "FINAL ANSWER: 42" NOT "FINAL ANSWER: 42 dollars" or "FINAL ANSWER: $42"
-- For STRINGS: No articles (a, an, the), no abbreviations, write digits in plain text
-  Example: "FINAL ANSWER: New York City" NOT "FINAL ANSWER: NYC" or "FINAL ANSWER: The Big Apple"
-- For LISTS: Comma-separated, apply above rules to each element
-  Example: "FINAL ANSWER: Paris, London, Berlin" or "FINAL ANSWER: 1.5, 2.3, 4.7"
-**STRATEGIC APPROACH:**
-1. **ANALYZE THE QUESTION**: Determine what type of answer is expected (number, string, or list)
-2. **DECOMPOSE THE PROBLEM**: Break complex questions into sub-problems:
-   - Identify required information sources
-   - Plan tool usage sequence
-   - Consider verification steps
-3. **TOOL SELECTION**:
-   - Use GoogleSearchTool for current information and general web queries
-   - Delegate to gaia_specialist for complex multi-tool analysis:
-     * advanced_web_query: Deep webpage content analysis
-     * enhanced_youtube_query: Video transcript analysis
-     * enhanced_python_execution: Mathematical calculations and data processing
-     * enhanced_wikipedia_search: Encyclopedic knowledge
-     * data_processing_tool: Structured data analysis
-4. **VERIFICATION**: Cross-check critical information and validate calculations
-**DELEGATION EXAMPLES**:
-Simple queries:
-```python
-# Direct search for current information
-result = search_tool.run("population Tokyo 2024")
-# Extract and format the answer properly
-```
-Complex analysis:
-```python
-# Delegate comprehensive tasks to specialist
-answer = gaia_specialist.run('''
-Find the founding year of the company mentioned in this video: [URL],
-calculate years from founding to 2024,
-then identify a major historical event from that founding year.
-Format the final answer according to GAIA requirements.
-''')
-```
-**RESPONSE STRUCTURE**:
-1. Show your reasoning and steps
-2. Use tools to gather information
-3. Verify your findings
-4. Format the final answer correctly
-5. End with "FINAL ANSWER: [answer]"
-**EXAMPLES OF PROPER FORMATTING**:
-- Question asks for a year: "FINAL ANSWER: 1991"
-- Question asks for a city: "FINAL ANSWER: San Francisco"
-- Question asks for a percentage: "FINAL ANSWER: 25" (not "25%" unless specified)
-- Question asks for a list of countries: "FINAL ANSWER: France, Germany, Italy"
-- Question asks for a calculation result: "FINAL ANSWER: 456"
-Remember: Be methodical, verify your information, and always end with the properly formatted FINAL ANSWER."""
     )
-    logging.info("🎯 Enhanced GAIA agent initialized successfully!")
-    return manager
 # --- Main Execution Block for Local Testing ---
 def main():
-    """Test the agent with sample GAIA-style questions."""
     configure_logging()
-    logging.info("🧪 Starting local testing...")
-    try:
-        agent = initialize_agent()
-        if not agent:
-            logging.error("Agent initialization failed")
-            return
-        # More challenging test questions similar to GAIA
-        test_questions = [
-            "What is 15! / (12! * 3!) ?",
-            "In what year was the Python programming language first released?",
-            "What is the square root of 2,025?",
-        ]
-        for i, question in enumerate(test_questions, 1):
-            logging.info(f"\n{'='*60}")
-            logging.info(f"🔍 Test Question {i}: {question}")
-            logging.info('='*60)
-            start_time = time.time()
-            try:
-                response = agent.run(question)
-                elapsed_time = time.time() - start_time
-                logging.info(f"✅ Agent Answer: {response}")
-                logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
-            except Exception as e:
-                logging.error(f"❌ Error processing question {i}: {e}")
-            time.sleep(2)  # Prevent rate limiting
-        logging.info(f"\n{'='*60}")
-        logging.info("🏁 Testing completed!")
-        logging.info('='*60)
-    except Exception as e:
-        logging.critical(f"💥 Critical error during testing: {e}", exc_info=True)
 if __name__ == "__main__":
     main()

 import json
 from functools import lru_cache, wraps
 from typing import Optional, Dict, Any, List
 from dotenv import load_dotenv
 from requests.exceptions import RequestException
 import serpapi
 from llama_index.core import VectorStoreIndex, download_loader
 from llama_index.core.schema import Document
+from youtube-transcript-api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 # --- Correctly import the specific tools from smolagents ---
 from smolagents import (
         datefmt="%Y-%m-%d %H:%M:%S"
     )
+def load_api_keys() -> Dict[str, Optional[str]]:
+    """Loads API keys from environment variables."""
     load_dotenv()
     keys = {
         'together': os.getenv('TOGETHER_API_KEY'),
         'serpapi': os.getenv('SERPAPI_API_KEY'),
     }
     for key_name, key_value in keys.items():
         if key_value:
             logging.info(f"✅ {key_name.upper()} API key loaded")
             logging.warning(f"⚠️ {key_name.upper()} API key not found")
     if not keys['together']:
+        raise ValueError("TOGETHER_API_KEY is required but not found.")
     return keys
 # --- Custom Exceptions ---
+class SerpApiClientException(Exception): pass
+class YouTubeTranscriptApiError(Exception): pass
 # --- Enhanced Decorators ---
 def retry(max_retries=3, initial_delay=1, backoff=2):
+    """A robust retry decorator with exponential backoff."""
     def decorator(func):
         @wraps(func)
         def wrapper(*args, **kwargs):
             delay = initial_delay
+            retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError, TranscriptsDisabled, NoTranscriptFound)
             for attempt in range(1, max_retries + 1):
                 try:
                     return func(*args, **kwargs)
                 except retryable_exceptions as e:
                     if attempt == max_retries:
                         logging.error(f"{func.__name__} failed after {attempt} attempts: {e}")
+                        # BUG FIX: Return a descriptive error string instead of raising, which could crash the agent.
+                        return f"Tool Error: {func.__name__} failed after {max_retries} attempts. Details: {e}"
                     logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...")
                     time.sleep(delay)
                     delay *= backoff
                 except Exception as e:
                     logging.error(f"{func.__name__} failed with a non-retryable error: {e}")
+                    return f"Tool Error: A non-retryable error occurred in {func.__name__}: {e}"
         return wrapper
     return decorator
 # --- Enhanced Helper Functions ---
 def extract_video_id(url_or_id: str) -> Optional[str]:
+    """Extracts YouTube video ID from various URL formats."""
+    if not url_or_id: return None
     url_or_id = url_or_id.strip()
     if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
         return url_or_id
     patterns = [
+        r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/|youtube-nocookie\.com/embed/)([a-zA-Z0-9_-]{11})'
     ]
     for pattern in patterns:
         match = re.search(pattern, url_or_id)
         if match:
             return match.group(1)
     return None
 def clean_text_output(text: str) -> str:
+    """Cleans and normalizes text output."""
+    if not text: return ""
+    text = re.sub(r'\s+', ' ', text).strip()
     return text
+# --- Answer Formatting and Extraction (CRITICAL FOR GAIA) ---
 def extract_final_answer(response: str) -> str:
+    """Extracts the final answer from the agent's full response string."""
+    if not response: return ""
+    match = re.search(r'FINAL\s+ANSWER\s*:\s*(.*)', response, re.IGNORECASE | re.DOTALL)
     if match:
+        return match.group(1).strip()
+    # Fallback if the pattern is missing
     lines = response.strip().split('\n')
+    return lines[-1].strip()
+def normalize_answer_format(answer: str) -> str:
+    """Normalizes the extracted answer to meet strict GAIA formatting requirements."""
+    if not answer: return ""
+    answer = answer.strip().rstrip('.')
+    # Auto-detect type
+    is_list = ',' in answer and len(answer.split(',')) > 1
+    is_numeric = False
     try:
+        # Check if it can be converted to a float (handles integers and floats)
+        float(answer.replace(',', ''))
+        is_numeric = not is_list # A list of numbers is a list, not a single number
+    except ValueError:
+        is_numeric = False
+    if is_numeric:
+        return re.sub(r'[,$%]', '', answer).strip()
+    elif is_list:
+        elements = [elem.strip() for elem in answer.split(',')]
+        # Recursively normalize each element of the list
+        normalized_elements = [normalize_answer_format(elem) for elem in elements]
+        return ', '.join(normalized_elements)
+    else: # Is a string
+        # Expand common abbreviations
+        abbreviations = {'NYC': 'New York City', 'LA': 'Los Angeles', 'SF': 'San Francisco'}
+        return abbreviations.get(answer.upper(), answer)
+# --- Agent Wrapper for GAIA Compliance ---
+def create_gaia_agent_wrapper(agent: CodeAgent):
+    """
+    Creates a callable wrapper around the agent to enforce GAIA answer formatting.
+    This is a key component for ensuring the final output is compliant.
+    """
+    def gaia_compliant_agent(question: str) -> str:
+        logging.info(f"Received question for GAIA compliant agent: '{question}'")
+        full_response = agent.run(question)
+        logging.info(f"Agent raw response:\n---\n{full_response}\n---")
+        final_answer = extract_final_answer(full_response)
+        normalized_answer = normalize_answer_format(final_answer)
+        logging.info(f"Extracted final answer: '{final_answer}'")
+        logging.info(f"Normalized answer for submission: '{normalized_answer}'")
+        return normalized_answer
+    return gaia_compliant_agent
+# --- Main Agent Initialization ---
 def initialize_agent():
+    """Initializes the enhanced multi-disciplinary agent for the GAIA benchmark."""
     configure_logging()
     logging.info("🚀 Starting GAIA agent initialization...")
     try:
         api_keys = load_api_keys()
+    except ValueError as e:
+        logging.error(f"FATAL: {e}")
         return None
+    # --- Tool Definitions ---
+    @lru_cache(maxsize=64)
+    @retry
+    def get_webpage_index(url: str) -> VectorStoreIndex:
+        logging.info(f"📄 Indexing webpage: {url}")
+        loader = download_loader("BeautifulSoupWebReader")()
+        docs = loader.load_data(urls=[url])
+        if not docs or not any(len(doc.text.strip()) > 50 for doc in docs):
+            raise ValueError(f"No substantial content found in {url}")
+        return VectorStoreIndex.from_documents(docs)
     @tool
     def enhanced_python_execution(code: str) -> str:
+        """Executes Python code in a restricted environment and returns the output."""
+        logging.info(f"🐍 Executing Python code: {code[:200]}...")
         stdout_capture = io.StringIO()
         try:
+            # ENHANCEMENT: Restrict built-ins for better security
+            safe_globals = {
+                "requests": __import__("requests"), "pd": __import__("pandas"), "np": __import__("numpy"),
+                "datetime": __import__("datetime"), "math": __import__("math"), "re": __import__("re"),
+                "json": __import__("json"), "collections": __import__("collections")
+            }
+            restricted_builtins = {
+                'print': print, 'len': len, 'range': range, 'str': str, 'int': int, 'float': float,
+                'list': list, 'dict': dict, 'set': set, 'tuple': tuple, 'max': max, 'min': min, 'sum': sum,
+                'sorted': sorted, 'round': round
+            }
+            with contextlib.redirect_stdout(stdout_capture):
                 exec(code, {"__builtins__": restricted_builtins}, safe_globals)
+            result = stdout_capture.getvalue().strip()
+            return result if result else "Code executed successfully with no output."
         except Exception as e:
             error_msg = f"Code execution error: {e}"
             logging.error(error_msg)
             return error_msg
     # --- Model and Agent Setup ---
     try:
         model = InferenceClientModel(
+            model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo",
             token=api_keys['together'],
             provider="together"
         )
+        logging.info("✅ Primary model (Llama 3.1 70B) loaded successfully")
     except Exception as e:
+        logging.warning(f"⚠️ Failed to load primary model, falling back. Error: {e}")
         model = InferenceClientModel(
+            model_id="Qwen/Qwen2.5-7B-Instruct",
             token=api_keys['together'],
             provider="together"
         )
+        logging.info("✅ Fallback model (Qwen 2.5 7B) loaded successfully")
+    google_search_tool = GoogleSearchTool(provider='serpapi', serpapi_api_key=api_keys['serpapi']) if api_keys['serpapi'] else None
+    tools_list = [tool for tool in [google_search_tool, enhanced_python_execution] if tool]
     manager = CodeAgent(
         model=model,
+        tools=tools_list,
+        instructions="""You are a master AI assistant for the GAIA benchmark. Your goal is to provide a single, precise, and final answer.
+        **STRATEGY:**
+        1.  **Analyze**: Break down the user's question into steps.
+        2.  **Execute**: Use the provided tools (`GoogleSearchTool`, `enhanced_python_execution`) to find the information or perform calculations.
+        3.  **Synthesize**: Combine the results of your tool use to form a final answer.
+        4.  **Format**: Present your final answer clearly at the end of your response, prefixed with `FINAL ANSWER:`.
+        **CRITICAL INSTRUCTION:** You MUST end your entire response with the line `FINAL ANSWER: [Your Final Answer]`. The text that follows this prefix is what will be submitted. Adhere to strict formatting: no extra words, no currency symbols, no commas in numbers.
+        - For "What is 2*21?": `FINAL ANSWER: 42`
+        - For "Capital of France?": `FINAL ANSWER: Paris`
+        - For "What are the first three even numbers?": `FINAL ANSWER: 2, 4, 6`
+        """
     )
+    logging.info("🎯 GAIA agent initialized successfully!")
+    # BUG FIX: Return the wrapped, compliant agent instead of the raw manager.
+    return create_gaia_agent_wrapper(manager)
 # --- Main Execution Block for Local Testing ---
 def main():
+    """Tests the agent with sample GAIA-style questions."""
     configure_logging()
+    logging.info("🧪 Starting local agent testing...")
+    agent = initialize_agent()
+    if not agent:
+        logging.critical("💥 Agent initialization failed. Exiting.")
+        return
+    test_questions = [
+        "What is 15! / (12! * 3!)?",
+        "In what year was the Python programming language first released?",
+        "What is the square root of 2025?",
+    ]
+    for i, question in enumerate(test_questions, 1):
+        logging.info(f"\n{'='*60}\n🔍 Test Question {i}: {question}\n{'='*60}")
+        start_time = time.time()
+        # BUG FIX: Call the agent wrapper directly, not agent.run()
+        final_answer = agent(question)
+        elapsed_time = time.time() - start_time
+        logging.info(f"✅ Submitted Answer: {final_answer}")
+        logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
+        time.sleep(1)
+    logging.info(f"\n{'='*60}\n🏁 Testing complete!\n{'='*60}")
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -17,3 +17,4 @@ requests
 llama-index
 beautifulsoup4
 lxml

 llama-index
 beautifulsoup4
 lxml
+json