jesusgj commited on
Commit
19b4b62
Β·
1 Parent(s): 871e0bb

Modified files

Browse files
Files changed (2) hide show
  1. agent.py +141 -1786
  2. requirements.txt +1 -0
agent.py CHANGED
@@ -8,17 +8,13 @@ import re
8
  import json
9
  from functools import lru_cache, wraps
10
  from typing import Optional, Dict, Any, List
11
- from datetime import datetime, timedelta
12
 
13
  from dotenv import load_dotenv
14
  from requests.exceptions import RequestException
15
- import requests
16
  import serpapi
17
  from llama_index.core import VectorStoreIndex, download_loader
18
  from llama_index.core.schema import Document
19
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
20
- import pandas as pd
21
- import numpy as np
22
 
23
  # --- Correctly import the specific tools from smolagents ---
24
  from smolagents import (
@@ -39,16 +35,13 @@ def configure_logging():
39
  datefmt="%Y-%m-%d %H:%M:%S"
40
  )
41
 
42
- def load_api_keys():
43
- """Loads API keys from environment variables with fallback."""
44
  load_dotenv()
45
  keys = {
46
  'together': os.getenv('TOGETHER_API_KEY'),
47
  'serpapi': os.getenv('SERPAPI_API_KEY'),
48
- 'hf_token': os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_HUB_TOKEN'),
49
  }
50
-
51
- # Log which keys are available (without revealing the actual keys)
52
  for key_name, key_value in keys.items():
53
  if key_value:
54
  logging.info(f"βœ… {key_name.upper()} API key loaded")
@@ -56,1890 +49,252 @@ def load_api_keys():
56
  logging.warning(f"⚠️ {key_name.upper()} API key not found")
57
 
58
  if not keys['together']:
59
- raise ValueError("TOGETHER_API_KEY is required but not found in environment variables.")
60
-
61
  return keys
62
 
63
  # --- Custom Exceptions ---
64
- class SerpApiClientException(Exception):
65
- pass
66
-
67
- class YouTubeTranscriptApiError(Exception):
68
- pass
69
-
70
- class DataProcessingError(Exception):
71
- pass
72
 
73
  # --- Enhanced Decorators ---
74
 
75
  def retry(max_retries=3, initial_delay=1, backoff=2):
76
- """A robust retry decorator with exponential backoff and better error handling."""
77
  def decorator(func):
78
  @wraps(func)
79
  def wrapper(*args, **kwargs):
80
  delay = initial_delay
81
- retryable_exceptions = (
82
- RequestException,
83
- SerpApiClientException,
84
- YouTubeTranscriptApiError,
85
- TranscriptsDisabled,
86
- NoTranscriptFound,
87
- ConnectionError,
88
- TimeoutError
89
- )
90
- last_exception = None
91
-
92
  for attempt in range(1, max_retries + 1):
93
  try:
94
  return func(*args, **kwargs)
95
  except retryable_exceptions as e:
96
- last_exception = e
97
  if attempt == max_retries:
98
  logging.error(f"{func.__name__} failed after {attempt} attempts: {e}")
99
- break
 
100
  logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...")
101
  time.sleep(delay)
102
  delay *= backoff
103
  except Exception as e:
104
  logging.error(f"{func.__name__} failed with a non-retryable error: {e}")
105
- raise
106
-
107
- # If we get here, all retries failed
108
- return f"Error after {max_retries} attempts: {last_exception}"
109
  return wrapper
110
  return decorator
111
 
112
  # --- Enhanced Helper Functions ---
113
 
114
  def extract_video_id(url_or_id: str) -> Optional[str]:
115
- """Extract YouTube video ID from various URL formats with better validation."""
116
- if not url_or_id:
117
- return None
118
-
119
- # Clean the input
120
  url_or_id = url_or_id.strip()
121
-
122
- # Check if it's already a video ID
123
  if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
124
  return url_or_id
125
-
126
- # Various YouTube URL patterns
127
  patterns = [
128
- r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
129
- r'youtube\.com/.*[?&]v=([a-zA-Z0-9_-]{11})',
130
- r'youtube-nocookie\.com/embed/([a-zA-Z0-9_-]{11})',
131
  ]
132
-
133
  for pattern in patterns:
134
  match = re.search(pattern, url_or_id)
135
  if match:
136
  return match.group(1)
137
-
138
  return None
139
 
140
  def clean_text_output(text: str) -> str:
141
- """Clean and normalize text output for better processing."""
142
- if not text:
143
- return ""
144
-
145
- # Remove excessive whitespace
146
- text = re.sub(r'\s+', ' ', text.strip())
147
-
148
- # Remove common prefixes that might interfere with answer extraction
149
- prefixes_to_remove = [
150
- "Based on the search results,",
151
- "According to the information,",
152
- "The answer is:",
153
- "Result:",
154
- ]
155
-
156
- for prefix in prefixes_to_remove:
157
- if text.lower().startswith(prefix.lower()):
158
- text = text[len(prefix):].strip()
159
-
160
  return text
161
 
162
- def extract_numerical_answer(text: str) -> Optional[str]:
163
- """Extract numerical answers from text with better precision."""
164
- # Look for standalone numbers
165
- number_patterns = [
166
- r'\b(\d+\.?\d*)\b', # Decimal numbers
167
- r'\b(\d+/\d+)\b', # Fractions
168
- r'\b(\d+,\d+(?:,\d+)*)\b', # Numbers with commas
169
- ]
170
-
171
- for pattern in number_patterns:
172
- matches = re.findall(pattern, text)
173
- if matches:
174
- return matches[-1] # Return the last match (often the final answer)
175
-
176
- return None
177
-
178
- # --- Answer Extraction Functions ---
179
 
180
  def extract_final_answer(response: str) -> str:
181
- """
182
- Extract the final answer from agent response following GAIA format requirements.
183
- Looks for 'FINAL ANSWER:' pattern and extracts the answer after it.
184
- """
185
- if not response:
186
- return ""
187
-
188
- # Look for FINAL ANSWER pattern (case insensitive)
189
- final_answer_pattern = re.compile(r'FINAL\s+ANSWER\s*:\s*(.+?)(?:\n|$)', re.IGNORECASE | re.DOTALL)
190
- match = final_answer_pattern.search(response)
191
-
192
  if match:
193
- answer = match.group(1).strip()
194
- # Clean up common formatting issues
195
- answer = re.sub(r'\s+', ' ', answer) # Normalize whitespace
196
- answer = answer.rstrip('.') # Remove trailing periods
197
- return answer
198
 
199
- # Fallback: if no FINAL ANSWER found, try to extract from end of response
200
  lines = response.strip().split('\n')
201
- if lines:
202
- last_line = lines[-1].strip()
203
- # Remove common prefixes
204
- for prefix in ['Answer:', 'Result:', 'The answer is:', 'Final result:']:
205
- if last_line.lower().startswith(prefix.lower()):
206
- return last_line[len(prefix):].strip()
207
- return last_line
208
-
209
- return response.strip()
210
 
211
- def normalize_answer_format(answer: str, expected_type: str = "auto") -> str:
212
- """
213
- Normalize answer format according to GAIA requirements.
214
- Args:
215
- answer: The extracted answer
216
- expected_type: "number", "string", "list", or "auto" to detect
217
- """
218
- if not answer:
219
- return answer
220
-
221
- answer = answer.strip()
222
 
223
- # Auto-detect type if not specified
224
- if expected_type == "auto":
225
- # Try to detect a list (comma-separated, at least two elements)
226
- if ',' in answer and len([x for x in answer.split(',') if x.strip()]) > 1:
227
- expected_type = "list"
228
- # Try to detect a number (integer or float, possibly negative)
229
- elif re.fullmatch(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?', answer.replace(',', '').strip()):
230
- expected_type = "number"
231
- # Otherwise, treat as string
232
- else:
233
- expected_type = "string"
234
-
235
-
236
- def initialize_agent():
237
- """
238
- Initializes an enhanced multi-disciplinary agent optimized for GAIA benchmark questions.
239
- """
240
- configure_logging()
241
- logging.info("πŸš€ Starting GAIA agent initialization...")
242
-
243
- try:
244
- api_keys = load_api_keys()
245
- except Exception as e:
246
- logging.error(f"Failed to load API keys: {e}")
247
- return None
248
-
249
- # --- Enhanced Caching Layer for LlamaIndex ---
250
- @lru_cache(maxsize=64) # Increased cache size
251
- @retry(max_retries=3)
252
- def get_webpage_index(url: str) -> VectorStoreIndex:
253
- logging.info(f"πŸ“„ Indexing webpage: {url}")
254
- try:
255
- loader_cls = download_loader("BeautifulSoupWebReader")
256
- loader = loader_cls()
257
- docs = loader.load_data(urls=[url])
258
- if not docs:
259
- raise ValueError(f"No content could be extracted from {url}")
260
-
261
- # Filter out very short documents
262
- valid_docs = [doc for doc in docs if len(doc.text.strip()) > 50]
263
- if not valid_docs:
264
- raise ValueError(f"No substantial content found in {url}")
265
-
266
- return VectorStoreIndex.from_documents(valid_docs)
267
- except Exception as e:
268
- logging.error(f"Error indexing webpage {url}: {e}")
269
- raise
270
-
271
- @lru_cache(maxsize=32)
272
- @retry(max_retries=3)
273
- def get_youtube_index(video_id: str) -> VectorStoreIndex:
274
- logging.info(f"πŸŽ₯ Indexing YouTube video: {video_id}")
275
- try:
276
- # Try to get English transcript first
277
- try:
278
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
279
- except (TranscriptsDisabled, NoTranscriptFound):
280
- # Try auto-generated or any available transcript
281
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
282
- try:
283
- transcript = transcript_list.find_transcript(['en']).fetch()
284
- except:
285
- # Get any available transcript
286
- available_transcripts = list(transcript_list)
287
- if not available_transcripts:
288
- raise YouTubeTranscriptApiError(f"No transcripts available for video {video_id}")
289
- transcript = available_transcripts[0].fetch()
290
-
291
- if not transcript:
292
- raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
293
-
294
- # Combine transcript with timestamps for better context
295
- text_segments = []
296
- for entry in transcript:
297
- timestamp = int(entry.get('start', 0))
298
- text = entry.get('text', '').strip()
299
- if text:
300
- text_segments.append(f"[{timestamp}s] {text}")
301
-
302
- full_text = ' '.join(text_segments)
303
- if not full_text.strip():
304
- raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
305
-
306
- doc = Document(
307
- text=full_text,
308
- doc_id=f"youtube_{video_id}",
309
- metadata={"source": f"https://youtube.com/watch?v={video_id}"}
310
- )
311
- return VectorStoreIndex.from_documents([doc])
312
-
313
- except Exception as e:
314
- logging.error(f"Error indexing YouTube video {video_id}: {e}")
315
- raise
316
-
317
- # --- Enhanced Tool Definitions ---
318
-
319
- @tool
320
- def advanced_web_query(url: str, query: str) -> str:
321
- """
322
- Extract specific information from a webpage using advanced querying.
323
- Handles various content types and provides detailed responses.
324
- Args:
325
- url: The webpage URL to analyze
326
- query: Specific question to ask about the content
327
- """
328
- try:
329
- if not url.startswith(('http://', 'https://')):
330
- url = 'https://' + url
331
-
332
- logging.info(f"πŸ” Querying webpage: {url} with query: {query}")
333
- index = get_webpage_index(url)
334
- query_engine = index.as_query_engine(
335
- similarity_top_k=8, # Increased for better coverage
336
- response_mode="tree_summarize",
337
- verbose=True
338
- )
339
-
340
- response = query_engine.query(query)
341
- result = clean_text_output(str(response))
342
-
343
- # If the response seems incomplete, try a broader query
344
- if len(result) < 50 and "not found" not in result.lower():
345
- broader_query = f"Information about {query.split()[-1] if query.split() else query}"
346
- broader_response = query_engine.query(broader_query)
347
- broader_result = clean_text_output(str(broader_response))
348
- if len(broader_result) > len(result):
349
- result = broader_result
350
-
351
- return result
352
-
353
- except Exception as e:
354
- error_msg = f"Error querying webpage {url}: {e}"
355
- logging.error(error_msg)
356
- return error_msg
357
-
358
- @tool
359
- def enhanced_youtube_query(video_url_or_id: str, query: str) -> str:
360
- """
361
- Extract information from YouTube video transcripts with enhanced processing.
362
- Handles timestamps and provides contextual responses.
363
- Args:
364
- video_url_or_id: YouTube URL or video ID
365
- query: Specific question about the video content
366
- """
367
- try:
368
- video_id = extract_video_id(video_url_or_id)
369
- if not video_id:
370
- return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
371
-
372
- logging.info(f"🎬 Querying YouTube video: {video_id} with query: {query}")
373
- index = get_youtube_index(video_id)
374
- query_engine = index.as_query_engine(
375
- similarity_top_k=6,
376
- response_mode="tree_summarize",
377
- verbose=True
378
- )
379
-
380
- response = query_engine.query(query)
381
- result = clean_text_output(str(response))
382
-
383
- return result
384
-
385
- except YouTubeTranscriptApiError as e:
386
- error_msg = f"YouTube transcript error for {video_url_or_id}: {e}"
387
- logging.error(error_msg)
388
- return error_msg
389
- except Exception as e:
390
- error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
391
- logging.error(error_msg)
392
- return error_msg
393
-
394
- @tool
395
- def enhanced_python_execution(code: str) -> str:
396
- """
397
- Execute Python code with enhanced capabilities and error handling.
398
- Includes mathematical, data processing, and web scraping capabilities.
399
- Args:
400
- code: Python code to execute
401
- """
402
- # Expanded safe globals with more libraries
403
- safe_globals = {}
404
- try:
405
- # Basic Python modules
406
- import math, datetime, json, re, collections, itertools, random
407
- from fractions import Fraction
408
- from decimal import Decimal
409
- import statistics
410
-
411
- safe_globals.update({
412
- 'math': math, 'datetime': datetime, 'json': json, 're': re,
413
- 'collections': collections, 'itertools': itertools, 'random': random,
414
- 'Fraction': Fraction, 'Decimal': Decimal, 'statistics': statistics
415
- })
416
-
417
- # Scientific computing
418
- try:
419
- import numpy as np
420
- safe_globals['np'] = np
421
- safe_globals['numpy'] = np
422
- except ImportError:
423
- logging.warning("NumPy not available")
424
-
425
- try:
426
- import pandas as pd
427
- safe_globals['pd'] = pd
428
- safe_globals['pandas'] = pd
429
- except ImportError:
430
- logging.warning("Pandas not available")
431
-
432
- # Web requests for data fetching
433
- try:
434
- import requests
435
- safe_globals['requests'] = requests
436
- except ImportError:
437
- logging.warning("Requests not available")
438
-
439
- except ImportError as e:
440
- logging.warning(f"Some modules not available: {e}")
441
-
442
- # Capture both stdout and stderr
443
- stdout_capture = io.StringIO()
444
- stderr_capture = io.StringIO()
445
-
446
- try:
447
- logging.info(f"🐍 Executing Python code: {code[:100]}...")
448
-
449
- with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
450
- # Use exec with restricted builtins for safety
451
- restricted_builtins = {
452
- 'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
453
- 'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
454
- 'enumerate': enumerate, 'filter': filter, 'float': float,
455
- 'format': format, 'hex': hex, 'int': int, 'len': len,
456
- 'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
457
- 'ord': ord, 'pow': pow, 'print': print, 'range': range,
458
- 'repr': repr, 'reversed': reversed, 'round': round,
459
- 'set': set, 'sorted': sorted, 'str': str, 'sum': sum,
460
- 'tuple': tuple, 'type': type, 'zip': zip,
461
- }
462
-
463
- exec(code, {"__builtins__": restricted_builtins}, safe_globals)
464
-
465
- stdout_result = stdout_capture.getvalue()
466
- stderr_result = stderr_capture.getvalue()
467
-
468
- # Combine outputs
469
- result_parts = []
470
- if stdout_result.strip():
471
- result_parts.append(stdout_result.strip())
472
- if stderr_result.strip():
473
- result_parts.append(f"Warnings/Errors: {stderr_result.strip()}")
474
-
475
- if result_parts:
476
- return '\n'.join(result_parts)
477
- else:
478
- return "Code executed successfully (no output)"
479
-
480
- except Exception as e:
481
- error_msg = f"Code execution error: {e}"
482
- stderr_result = stderr_capture.getvalue()
483
- if stderr_result.strip():
484
- error_msg += f"\nAdditional details: {stderr_result.strip()}"
485
- logging.error(error_msg)
486
- return error_msg
487
-
488
- @tool
489
- def enhanced_wikipedia_search(query: str, detailed: bool = True) -> str:
490
- """
491
- Search Wikipedia with enhanced content extraction and error handling.
492
- Args:
493
- query: Search term
494
- detailed: Whether to return detailed information or just summary
495
- """
496
- try:
497
- import wikipedia
498
- wikipedia.set_lang("en")
499
- wikipedia.set_rate_limiting(True)
500
-
501
- logging.info(f"πŸ“š Searching Wikipedia for: {query}")
502
-
503
- # Handle disambiguation and search suggestions
504
- try:
505
- page = wikipedia.page(query, auto_suggest=True)
506
- except wikipedia.DisambiguationError as e:
507
- # Take the first option from disambiguation
508
- if e.options:
509
- page = wikipedia.page(e.options[0])
510
- else:
511
- return f"Wikipedia disambiguation error for '{query}': {e}"
512
- except wikipedia.PageError:
513
- # Try searching if direct page lookup fails
514
- search_results = wikipedia.search(query, results=3)
515
- if search_results:
516
- page = wikipedia.page(search_results[0])
517
- else:
518
- return f"No Wikipedia results found for '{query}'"
519
-
520
- if detailed:
521
- # Get more comprehensive content
522
- content_sections = []
523
- content_sections.append(f"**{page.title}**")
524
- content_sections.append(f"Summary: {page.summary}")
525
-
526
- # Add first few sections if available
527
- if hasattr(page, 'content') and page.content:
528
- sections = page.content.split('\n\n')[:3] # First 3 paragraphs
529
- for section in sections:
530
- if section.strip() and len(section) > 50:
531
- content_sections.append(section.strip())
532
-
533
- content_sections.append(f"Source: {page.url}")
534
- return '\n\n'.join(content_sections)
535
- else:
536
- return f"**{page.title}**\n\n{page.summary}\n\nSource: {page.url}"
537
-
538
- except ImportError:
539
- return "Wikipedia library not installed. Cannot perform search."
540
- except Exception as e:
541
- error_msg = f"Wikipedia search error for '{query}': {e}"
542
- logging.error(error_msg)
543
- return error_msg
544
-
545
- @tool
546
- def data_processing_tool(data_description: str, operation: str) -> str:
547
- """
548
- Process and analyze data based on descriptions and operations.
549
- Useful for mathematical calculations, data analysis, and structured data processing.
550
- Args:
551
- data_description: Description of the data or data source
552
- operation: The operation to perform (calculate, analyze, extract, etc.)
553
- """
554
- try:
555
- logging.info(f"πŸ“Š Processing data: {data_description} | Operation: {operation}")
556
-
557
- # This tool is designed to work with the Python execution tool
558
- # for complex data processing tasks
559
- code_template = f"""
560
- # Data processing task: {operation}
561
- # Data description: {data_description}
562
-
563
- # Add your specific data processing logic here
564
- # This is a template - specific implementation depends on the data and operation
565
-
566
- print("Data processing task initiated")
567
- print(f"Description: {data_description}")
568
- print(f"Operation: {operation}")
569
-
570
- # Example operations:
571
- if "calculate" in "{operation}".lower():
572
- print("Performing calculation...")
573
- elif "analyze" in "{operation}".lower():
574
- print("Performing analysis...")
575
- elif "extract" in "{operation}".lower():
576
- print("Extracting information...")
577
-
578
- print("Task completed - use enhanced_python_execution for specific calculations")
579
- """
580
-
581
- return enhanced_python_execution(code_template)
582
-
583
- except Exception as e:
584
- error_msg = f"Data processing error: {e}"
585
- logging.error(error_msg)
586
- return error_msg
587
-
588
- # --- Model and Agent Setup ---
589
 
 
 
 
590
  try:
591
- # Use a more capable model for better performance
592
- model = InferenceClientModel(
593
- model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo", # Upgraded model
594
- token=api_keys['together'],
595
- provider="together"
596
- )
597
- logging.info("βœ… Model loaded successfully")
598
- except Exception as e:
599
- logging.error(f"Failed to load primary model, falling back: {e}")
600
- try:
601
- # Fallback model
602
- model = InferenceClientModel(
603
- model_id="Qwen/Qwen2.5-7B-Instruct",
604
- token=api_keys['together'],
605
- provider="together"
606
- )
607
- logging.info("βœ… Fallback model loaded successfully")
608
- except Exception as e2:
609
- logging.error(f"Failed to load fallback model: {e2}")
610
- raise
611
-
612
- # Configure Google Search tool
613
- google_search_tool = None
614
- if api_keys['serpapi']:
615
- try:
616
- google_search_tool = GoogleSearchTool(
617
- provider='serpapi',
618
- serpapi_api_key=api_keys['serpapi']
619
- )
620
- logging.info("βœ… Google Search tool configured")
621
- except Exception as e:
622
- logging.warning(f"Failed to configure Google Search tool: {e}")
623
-
624
- # Prepare tools list
625
- tools_list = [
626
- enhanced_wikipedia_search,
627
- advanced_web_query,
628
- enhanced_youtube_query,
629
- enhanced_python_execution,
630
- data_processing_tool,
631
- ]
632
-
633
- if google_search_tool:
634
- tools_list.insert(0, google_search_tool)
635
-
636
- # Specialized worker agent with comprehensive toolset
637
- worker_agent = ToolCallingAgent(
638
- tools=tools_list,
639
- model=model,
640
- max_steps=8, # Increased for complex tasks
641
- name="gaia_specialist",
642
- description="Advanced specialist agent for GAIA benchmark: web research, document analysis, video processing, mathematical computation, and data analysis."
643
- )
644
-
645
- # Enhanced strategic manager agent
646
- manager_tools = []
647
- if google_search_tool:
648
- manager_tools.append(google_search_tool)
649
-
650
- manager = CodeAgent(
651
- model=model,
652
- managed_agents=[worker_agent],
653
- tools=manager_tools,
654
- instructions="""You are a general AI assistant designed for the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions that require deep reasoning and analysis.
655
-
656
- **CRITICAL: ANSWER FORMAT REQUIREMENT**
657
- You MUST finish your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
658
-
659
- YOUR FINAL ANSWER formatting rules:
660
- - For NUMBERS: No commas, no units (like $ or %), no additional text
661
- Example: "FINAL ANSWER: 42" NOT "FINAL ANSWER: 42 dollars" or "FINAL ANSWER: $42"
662
- - For STRINGS: No articles (a, an, the), no abbreviations, write digits in plain text
663
- Example: "FINAL ANSWER: New York City" NOT "FINAL ANSWER: NYC" or "FINAL ANSWER: The Big Apple"
664
- - For LISTS: Comma-separated, apply above rules to each element
665
- Example: "FINAL ANSWER: Paris, London, Berlin" or "FINAL ANSWER: 1.5, 2.3, 4.7"
666
-
667
- **STRATEGIC APPROACH:**
668
-
669
- 1. **ANALYZE THE QUESTION**: Determine what type of answer is expected (number, string, or list)
670
-
671
- 2. **DECOMPOSE THE PROBLEM**: Break complex questions into sub-problems:
672
- - Identify required information sources
673
- - Plan tool usage sequence
674
- - Consider verification steps
675
-
676
- 3. **TOOL SELECTION**:
677
- - Use GoogleSearchTool for current information and general web queries
678
- - Delegate to gaia_specialist for complex multi-tool analysis:
679
- * advanced_web_query: Deep webpage content analysis
680
- * enhanced_youtube_query: Video transcript analysis
681
- * enhanced_python_execution: Mathematical calculations and data processing
682
- * enhanced_wikipedia_search: Encyclopedic knowledge
683
- * data_processing_tool: Structured data analysis
684
-
685
- 4. **VERIFICATION**: Cross-check critical information and validate calculations
686
-
687
- **DELEGATION EXAMPLES**:
688
-
689
- Simple queries:
690
- ```python
691
- # Direct search for current information
692
- result = search_tool.run("population Tokyo 2024")
693
- # Extract and format the answer properly
694
- ```
695
-
696
- Complex analysis:
697
- ```python
698
- # Delegate comprehensive tasks to specialist
699
- answer = gaia_specialist.run('''
700
- Find the founding year of the company mentioned in this video: [URL],
701
- calculate years from founding to 2024,
702
- then identify a major historical event from that founding year.
703
- Format the final answer according to GAIA requirements.
704
- ''')
705
- ```
706
-
707
- **RESPONSE STRUCTURE**:
708
- 1. Show your reasoning and steps
709
- 2. Use tools to gather information
710
- 3. Verify your findings
711
- 4. Format the final answer correctly
712
- 5. End with "FINAL ANSWER: [answer]"
713
-
714
- **EXAMPLES OF PROPER FORMATTING**:
715
- - Question asks for a year: "FINAL ANSWER: 1991"
716
- - Question asks for a city: "FINAL ANSWER: San Francisco"
717
- - Question asks for a percentage: "FINAL ANSWER: 25" (not "25%" unless specified)
718
- - Question asks for a list of countries: "FINAL ANSWER: France, Germany, Italy"
719
- - Question asks for a calculation result: "FINAL ANSWER: 456"
720
-
721
- Remember: Be methodical, verify your information, and always end with the properly formatted FINAL ANSWER."""
722
- )
723
-
724
- logging.info("🎯 Enhanced GAIA agent initialized successfully!")
725
-
726
- # Return wrapped agent that ensures GAIA format compliance
727
- return create_gaia_agent_wrapper(manager)
728
 
729
- # --- Main Execution Block for Local Testing ---
730
 
731
- def main():
732
- """Test the agent with sample GAIA-style questions."""
733
- configure_logging()
734
- logging.info("πŸ§ͺ Starting local testing...")
735
-
736
- try:
737
- agent = initialize_agent()
738
- if not agent:
739
- logging.error("Agent initialization failed")
740
- return
741
 
742
- # More challenging test questions similar to GAIA
743
- test_questions = [
744
- "What is 15! / (12! * 3!) ?",
745
- "In what year was the Python programming language first released?",
746
- "What is the square root of 2,025?",
747
- ]
748
 
749
- for i, question in enumerate(test_questions, 1):
750
- logging.info(f"\n{'='*60}")
751
- logging.info(f"πŸ” Test Question {i}: {question}")
752
- logging.info('='*60)
753
-
754
- start_time = time.time()
755
- try:
756
- # The agent wrapper now handles GAIA format compliance
757
- response = agent(question)
758
- elapsed_time = time.time() - start_time
759
-
760
- logging.info(f"βœ… Final Answer: {response}")
761
- logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
762
-
763
- except Exception as e:
764
- logging.error(f"❌ Error processing question {i}: {e}")
765
-
766
- time.sleep(2) # Prevent rate limiting
767
-
768
- logging.info(f"\n{'='*60}")
769
- logging.info("🏁 Testing completed!")
770
- logging.info('='*60)
771
 
772
- except Exception as e:
773
- logging.critical(f"πŸ’₯ Critical error during testing: {e}", exc_info=True)
774
 
775
- if __name__ == "__main__":
776
- main(), answer:
777
- expected_type = "number" if ',' not in answer or answer.count(',') < 2 else "list"
778
- elif ',' in answer and len(answer.split(',')) > 1:
779
- expected_type = "list"
780
- else:
781
- expected_type = "string"
782
-
783
- if expected_type == "number":
784
- # Remove commas and units for numbers
785
- answer = re.sub(r'[,$%]', '', answer)
786
- answer = re.sub(r'\s+', '', answer)
787
- # Keep only number and decimal point
788
- number_match = re.search(r'[\d.-]+', answer)
789
- if number_match:
790
- return number_match.group(0)
791
-
792
- elif expected_type == "string":
793
- # Remove articles and normalize
794
- answer = re.sub(r'\b(a|an|the)\s+', '', answer, flags=re.IGNORECASE)
795
- answer = re.sub(r'\s+', ' ', answer).strip()
796
- # Expand common abbreviations
797
- abbreviations = {
798
- 'NYC': 'New York City',
799
- 'LA': 'Los Angeles',
800
- 'SF': 'San Francisco',
801
- 'US': 'United States',
802
- 'UK': 'United Kingdom',
803
- 'EU': 'European Union'
804
- }
805
- for abbr, full in abbreviations.items():
806
- if answer.upper() == abbr:
807
- answer = full
808
- break
809
-
810
- elif expected_type == "list":
811
- # Process each element in the list
812
- elements = [elem.strip() for elem in answer.split(',')]
813
- normalized_elements = []
814
- for elem in elements:
815
- if re.match(r'^[\d.-]+', elem):
816
 
817
  def initialize_agent():
818
- """
819
- Initializes an enhanced multi-disciplinary agent optimized for GAIA benchmark questions.
820
- """
821
  configure_logging()
822
  logging.info("πŸš€ Starting GAIA agent initialization...")
823
 
824
  try:
825
  api_keys = load_api_keys()
826
- except Exception as e:
827
- logging.error(f"Failed to load API keys: {e}")
828
  return None
829
-
830
- # --- Enhanced Caching Layer for LlamaIndex ---
831
- @lru_cache(maxsize=64) # Increased cache size
832
- @retry(max_retries=3)
833
- def get_webpage_index(url: str) -> VectorStoreIndex:
834
- logging.info(f"πŸ“„ Indexing webpage: {url}")
835
- try:
836
- loader_cls = download_loader("BeautifulSoupWebReader")
837
- loader = loader_cls()
838
- docs = loader.load_data(urls=[url])
839
- if not docs:
840
- raise ValueError(f"No content could be extracted from {url}")
841
-
842
- # Filter out very short documents
843
- valid_docs = [doc for doc in docs if len(doc.text.strip()) > 50]
844
- if not valid_docs:
845
- raise ValueError(f"No substantial content found in {url}")
846
-
847
- return VectorStoreIndex.from_documents(valid_docs)
848
- except Exception as e:
849
- logging.error(f"Error indexing webpage {url}: {e}")
850
- raise
851
 
852
- @lru_cache(maxsize=32)
853
- @retry(max_retries=3)
854
- def get_youtube_index(video_id: str) -> VectorStoreIndex:
855
- logging.info(f"πŸŽ₯ Indexing YouTube video: {video_id}")
856
- try:
857
- # Try to get English transcript first
858
- try:
859
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
860
- except (TranscriptsDisabled, NoTranscriptFound):
861
- # Try auto-generated or any available transcript
862
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
863
- try:
864
- transcript = transcript_list.find_transcript(['en']).fetch()
865
- except:
866
- # Get any available transcript
867
- available_transcripts = list(transcript_list)
868
- if not available_transcripts:
869
- raise YouTubeTranscriptApiError(f"No transcripts available for video {video_id}")
870
- transcript = available_transcripts[0].fetch()
871
-
872
- if not transcript:
873
- raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
874
-
875
- # Combine transcript with timestamps for better context
876
- text_segments = []
877
- for entry in transcript:
878
- timestamp = int(entry.get('start', 0))
879
- text = entry.get('text', '').strip()
880
- if text:
881
- text_segments.append(f"[{timestamp}s] {text}")
882
-
883
- full_text = ' '.join(text_segments)
884
- if not full_text.strip():
885
- raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
886
-
887
- doc = Document(
888
- text=full_text,
889
- doc_id=f"youtube_{video_id}",
890
- metadata={"source": f"https://youtube.com/watch?v={video_id}"}
891
- )
892
- return VectorStoreIndex.from_documents([doc])
893
-
894
- except Exception as e:
895
- logging.error(f"Error indexing YouTube video {video_id}: {e}")
896
- raise
897
-
898
- # --- Enhanced Tool Definitions ---
899
 
900
- @tool
901
- def advanced_web_query(url: str, query: str) -> str:
902
- """
903
- Extract specific information from a webpage using advanced querying.
904
- Handles various content types and provides detailed responses.
905
- Args:
906
- url: The webpage URL to analyze
907
- query: Specific question to ask about the content
908
- """
909
- try:
910
- if not url.startswith(('http://', 'https://')):
911
- url = 'https://' + url
912
-
913
- logging.info(f"πŸ” Querying webpage: {url} with query: {query}")
914
- index = get_webpage_index(url)
915
- query_engine = index.as_query_engine(
916
- similarity_top_k=8, # Increased for better coverage
917
- response_mode="tree_summarize",
918
- verbose=True
919
- )
920
-
921
- response = query_engine.query(query)
922
- result = clean_text_output(str(response))
923
-
924
- # If the response seems incomplete, try a broader query
925
- if len(result) < 50 and "not found" not in result.lower():
926
- broader_query = f"Information about {query.split()[-1] if query.split() else query}"
927
- broader_response = query_engine.query(broader_query)
928
- broader_result = clean_text_output(str(broader_response))
929
- if len(broader_result) > len(result):
930
- result = broader_result
931
-
932
- return result
933
-
934
- except Exception as e:
935
- error_msg = f"Error querying webpage {url}: {e}"
936
- logging.error(error_msg)
937
- return error_msg
938
-
939
- @tool
940
- def enhanced_youtube_query(video_url_or_id: str, query: str) -> str:
941
- """
942
- Extract information from YouTube video transcripts with enhanced processing.
943
- Handles timestamps and provides contextual responses.
944
- Args:
945
- video_url_or_id: YouTube URL or video ID
946
- query: Specific question about the video content
947
- """
948
- try:
949
- video_id = extract_video_id(video_url_or_id)
950
- if not video_id:
951
- return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
952
-
953
- logging.info(f"🎬 Querying YouTube video: {video_id} with query: {query}")
954
- index = get_youtube_index(video_id)
955
- query_engine = index.as_query_engine(
956
- similarity_top_k=6,
957
- response_mode="tree_summarize",
958
- verbose=True
959
- )
960
-
961
- response = query_engine.query(query)
962
- result = clean_text_output(str(response))
963
-
964
- return result
965
-
966
- except YouTubeTranscriptApiError as e:
967
- error_msg = f"YouTube transcript error for {video_url_or_id}: {e}"
968
- logging.error(error_msg)
969
- return error_msg
970
- except Exception as e:
971
- error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
972
- logging.error(error_msg)
973
- return error_msg
974
 
975
  @tool
976
  def enhanced_python_execution(code: str) -> str:
977
- """
978
- Execute Python code with enhanced capabilities and error handling.
979
- Includes mathematical, data processing, and web scraping capabilities.
980
- Args:
981
- code: Python code to execute
982
- """
983
- # Expanded safe globals with more libraries
984
- safe_globals = {}
985
- try:
986
- # Basic Python modules
987
- import math, datetime, json, re, collections, itertools, random
988
- from fractions import Fraction
989
- from decimal import Decimal
990
- import statistics
991
-
992
- safe_globals.update({
993
- 'math': math, 'datetime': datetime, 'json': json, 're': re,
994
- 'collections': collections, 'itertools': itertools, 'random': random,
995
- 'Fraction': Fraction, 'Decimal': Decimal, 'statistics': statistics
996
- })
997
-
998
- # Scientific computing
999
- try:
1000
- import numpy as np
1001
- safe_globals['np'] = np
1002
- safe_globals['numpy'] = np
1003
- except ImportError:
1004
- logging.warning("NumPy not available")
1005
-
1006
- try:
1007
- import pandas as pd
1008
- safe_globals['pd'] = pd
1009
- safe_globals['pandas'] = pd
1010
- except ImportError:
1011
- logging.warning("Pandas not available")
1012
-
1013
- # Web requests for data fetching
1014
- try:
1015
- import requests
1016
- safe_globals['requests'] = requests
1017
- except ImportError:
1018
- logging.warning("Requests not available")
1019
-
1020
- except ImportError as e:
1021
- logging.warning(f"Some modules not available: {e}")
1022
-
1023
- # Capture both stdout and stderr
1024
  stdout_capture = io.StringIO()
1025
- stderr_capture = io.StringIO()
1026
-
1027
  try:
1028
- logging.info(f"🐍 Executing Python code: {code[:100]}...")
1029
-
1030
- with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
1031
- # Use exec with restricted builtins for safety
1032
- restricted_builtins = {
1033
- 'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
1034
- 'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
1035
- 'enumerate': enumerate, 'filter': filter, 'float': float,
1036
- 'format': format, 'hex': hex, 'int': int, 'len': len,
1037
- 'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
1038
- 'ord': ord, 'pow': pow, 'print': print, 'range': range,
1039
- 'repr': repr, 'reversed': reversed, 'round': round,
1040
- 'set': set, 'sorted': sorted, 'str': str, 'sum': sum,
1041
- 'tuple': tuple, 'type': type, 'zip': zip,
1042
- }
1043
-
1044
  exec(code, {"__builtins__": restricted_builtins}, safe_globals)
1045
 
1046
- stdout_result = stdout_capture.getvalue()
1047
- stderr_result = stderr_capture.getvalue()
1048
-
1049
- # Combine outputs
1050
- result_parts = []
1051
- if stdout_result.strip():
1052
- result_parts.append(stdout_result.strip())
1053
- if stderr_result.strip():
1054
- result_parts.append(f"Warnings/Errors: {stderr_result.strip()}")
1055
-
1056
- if result_parts:
1057
- return '\n'.join(result_parts)
1058
- else:
1059
- return "Code executed successfully (no output)"
1060
-
1061
  except Exception as e:
1062
  error_msg = f"Code execution error: {e}"
1063
- stderr_result = stderr_capture.getvalue()
1064
- if stderr_result.strip():
1065
- error_msg += f"\nAdditional details: {stderr_result.strip()}"
1066
- logging.error(error_msg)
1067
- return error_msg
1068
-
1069
- @tool
1070
- def enhanced_wikipedia_search(query: str, detailed: bool = True) -> str:
1071
- """
1072
- Search Wikipedia with enhanced content extraction and error handling.
1073
- Args:
1074
- query: Search term
1075
- detailed: Whether to return detailed information or just summary
1076
- """
1077
- try:
1078
- import wikipedia
1079
- wikipedia.set_lang("en")
1080
- wikipedia.set_rate_limiting(True)
1081
-
1082
- logging.info(f"πŸ“š Searching Wikipedia for: {query}")
1083
-
1084
- # Handle disambiguation and search suggestions
1085
- try:
1086
- page = wikipedia.page(query, auto_suggest=True)
1087
- except wikipedia.DisambiguationError as e:
1088
- # Take the first option from disambiguation
1089
- if e.options:
1090
- page = wikipedia.page(e.options[0])
1091
- else:
1092
- return f"Wikipedia disambiguation error for '{query}': {e}"
1093
- except wikipedia.PageError:
1094
- # Try searching if direct page lookup fails
1095
- search_results = wikipedia.search(query, results=3)
1096
- if search_results:
1097
- page = wikipedia.page(search_results[0])
1098
- else:
1099
- return f"No Wikipedia results found for '{query}'"
1100
-
1101
- if detailed:
1102
- # Get more comprehensive content
1103
- content_sections = []
1104
- content_sections.append(f"**{page.title}**")
1105
- content_sections.append(f"Summary: {page.summary}")
1106
-
1107
- # Add first few sections if available
1108
- if hasattr(page, 'content') and page.content:
1109
- sections = page.content.split('\n\n')[:3] # First 3 paragraphs
1110
- for section in sections:
1111
- if section.strip() and len(section) > 50:
1112
- content_sections.append(section.strip())
1113
-
1114
- content_sections.append(f"Source: {page.url}")
1115
- return '\n\n'.join(content_sections)
1116
- else:
1117
- return f"**{page.title}**\n\n{page.summary}\n\nSource: {page.url}"
1118
-
1119
- except ImportError:
1120
- return "Wikipedia library not installed. Cannot perform search."
1121
- except Exception as e:
1122
- error_msg = f"Wikipedia search error for '{query}': {e}"
1123
- logging.error(error_msg)
1124
- return error_msg
1125
-
1126
- @tool
1127
- def data_processing_tool(data_description: str, operation: str) -> str:
1128
- """
1129
- Process and analyze data based on descriptions and operations.
1130
- Useful for mathematical calculations, data analysis, and structured data processing.
1131
- Args:
1132
- data_description: Description of the data or data source
1133
- operation: The operation to perform (calculate, analyze, extract, etc.)
1134
- """
1135
- try:
1136
- logging.info(f"πŸ“Š Processing data: {data_description} | Operation: {operation}")
1137
-
1138
- # This tool is designed to work with the Python execution tool
1139
- # for complex data processing tasks
1140
- code_template = f"""
1141
- # Data processing task: {operation}
1142
- # Data description: {data_description}
1143
-
1144
- # Add your specific data processing logic here
1145
- # This is a template - specific implementation depends on the data and operation
1146
-
1147
- print("Data processing task initiated")
1148
- print(f"Description: {data_description}")
1149
- print(f"Operation: {operation}")
1150
-
1151
- # Example operations:
1152
- if "calculate" in "{operation}".lower():
1153
- print("Performing calculation...")
1154
- elif "analyze" in "{operation}".lower():
1155
- print("Performing analysis...")
1156
- elif "extract" in "{operation}".lower():
1157
- print("Extracting information...")
1158
-
1159
- print("Task completed - use enhanced_python_execution for specific calculations")
1160
- """
1161
-
1162
- return enhanced_python_execution(code_template)
1163
-
1164
- except Exception as e:
1165
- error_msg = f"Data processing error: {e}"
1166
  logging.error(error_msg)
1167
  return error_msg
1168
 
1169
  # --- Model and Agent Setup ---
1170
 
1171
  try:
1172
- # Use a more capable model for better performance
1173
  model = InferenceClientModel(
1174
- model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo", # Upgraded model
1175
  token=api_keys['together'],
1176
  provider="together"
1177
  )
1178
- logging.info("βœ… Model loaded successfully")
1179
- except Exception as e:
1180
- logging.error(f"Failed to load primary model, falling back: {e}")
1181
- try:
1182
- # Fallback model
1183
- model = InferenceClientModel(
1184
- model_id="Qwen/Qwen2.5-7B-Instruct",
1185
- token=api_keys['together'],
1186
- provider="together"
1187
- )
1188
- logging.info("βœ… Fallback model loaded successfully")
1189
- except Exception as e2:
1190
- logging.error(f"Failed to load fallback model: {e2}")
1191
- raise
1192
-
1193
- # Configure Google Search tool
1194
- google_search_tool = None
1195
- if api_keys['serpapi']:
1196
- try:
1197
- google_search_tool = GoogleSearchTool(
1198
- provider='serpapi',
1199
- serpapi_api_key=api_keys['serpapi']
1200
- )
1201
- logging.info("βœ… Google Search tool configured")
1202
- except Exception as e:
1203
- logging.warning(f"Failed to configure Google Search tool: {e}")
1204
-
1205
- # Prepare tools list
1206
- tools_list = [
1207
- enhanced_wikipedia_search,
1208
- advanced_web_query,
1209
- enhanced_youtube_query,
1210
- enhanced_python_execution,
1211
- data_processing_tool,
1212
- ]
1213
-
1214
- if google_search_tool:
1215
- tools_list.insert(0, google_search_tool)
1216
-
1217
- # Specialized worker agent with comprehensive toolset
1218
- worker_agent = ToolCallingAgent(
1219
- tools=tools_list,
1220
- model=model,
1221
- max_steps=8, # Increased for complex tasks
1222
- name="gaia_specialist",
1223
- description="Advanced specialist agent for GAIA benchmark: web research, document analysis, video processing, mathematical computation, and data analysis."
1224
- )
1225
-
1226
- # Enhanced strategic manager agent
1227
- manager_tools = []
1228
- if google_search_tool:
1229
- manager_tools.append(google_search_tool)
1230
-
1231
- manager = CodeAgent(
1232
- model=model,
1233
- managed_agents=[worker_agent],
1234
- tools=manager_tools,
1235
- instructions="""You are a general AI assistant designed for the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions that require deep reasoning and analysis.
1236
-
1237
- **CRITICAL: ANSWER FORMAT REQUIREMENT**
1238
- You MUST finish your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
1239
-
1240
- YOUR FINAL ANSWER formatting rules:
1241
- - For NUMBERS: No commas, no units (like $ or %), no additional text
1242
- Example: "FINAL ANSWER: 42" NOT "FINAL ANSWER: 42 dollars" or "FINAL ANSWER: $42"
1243
- - For STRINGS: No articles (a, an, the), no abbreviations, write digits in plain text
1244
- Example: "FINAL ANSWER: New York City" NOT "FINAL ANSWER: NYC" or "FINAL ANSWER: The Big Apple"
1245
- - For LISTS: Comma-separated, apply above rules to each element
1246
- Example: "FINAL ANSWER: Paris, London, Berlin" or "FINAL ANSWER: 1.5, 2.3, 4.7"
1247
-
1248
- **STRATEGIC APPROACH:**
1249
-
1250
- 1. **ANALYZE THE QUESTION**: Determine what type of answer is expected (number, string, or list)
1251
-
1252
- 2. **DECOMPOSE THE PROBLEM**: Break complex questions into sub-problems:
1253
- - Identify required information sources
1254
- - Plan tool usage sequence
1255
- - Consider verification steps
1256
-
1257
- 3. **TOOL SELECTION**:
1258
- - Use GoogleSearchTool for current information and general web queries
1259
- - Delegate to gaia_specialist for complex multi-tool analysis:
1260
- * advanced_web_query: Deep webpage content analysis
1261
- * enhanced_youtube_query: Video transcript analysis
1262
- * enhanced_python_execution: Mathematical calculations and data processing
1263
- * enhanced_wikipedia_search: Encyclopedic knowledge
1264
- * data_processing_tool: Structured data analysis
1265
-
1266
- 4. **VERIFICATION**: Cross-check critical information and validate calculations
1267
-
1268
- **DELEGATION EXAMPLES**:
1269
-
1270
- Simple queries:
1271
- ```python
1272
- # Direct search for current information
1273
- result = search_tool.run("population Tokyo 2024")
1274
- # Extract and format the answer properly
1275
- ```
1276
-
1277
- Complex analysis:
1278
- ```python
1279
- # Delegate comprehensive tasks to specialist
1280
- answer = gaia_specialist.run('''
1281
- Find the founding year of the company mentioned in this video: [URL],
1282
- calculate years from founding to 2024,
1283
- then identify a major historical event from that founding year.
1284
- Format the final answer according to GAIA requirements.
1285
- ''')
1286
- ```
1287
-
1288
- **RESPONSE STRUCTURE**:
1289
- 1. Show your reasoning and steps
1290
- 2. Use tools to gather information
1291
- 3. Verify your findings
1292
- 4. Format the final answer correctly
1293
- 5. End with "FINAL ANSWER: [answer]"
1294
-
1295
- **EXAMPLES OF PROPER FORMATTING**:
1296
- - Question asks for a year: "FINAL ANSWER: 1991"
1297
- - Question asks for a city: "FINAL ANSWER: San Francisco"
1298
- - Question asks for a percentage: "FINAL ANSWER: 25" (not "25%" unless specified)
1299
- - Question asks for a list of countries: "FINAL ANSWER: France, Germany, Italy"
1300
- - Question asks for a calculation result: "FINAL ANSWER: 456"
1301
-
1302
- Remember: Be methodical, verify your information, and always end with the properly formatted FINAL ANSWER."""
1303
- )
1304
-
1305
- logging.info("🎯 Enhanced GAIA agent initialized successfully!")
1306
- return manager
1307
-
1308
- # --- Main Execution Block for Local Testing ---
1309
-
1310
- def main():
1311
- """Test the agent with sample GAIA-style questions."""
1312
- configure_logging()
1313
- logging.info("πŸ§ͺ Starting local testing...")
1314
-
1315
- try:
1316
- agent = initialize_agent()
1317
- if not agent:
1318
- logging.error("Agent initialization failed")
1319
- return
1320
-
1321
- # More challenging test questions similar to GAIA
1322
- test_questions = [
1323
- "What is 15! / (12! * 3!) ?",
1324
- "In what year was the Python programming language first released?",
1325
- "What is the square root of 2,025?",
1326
- ]
1327
-
1328
- for i, question in enumerate(test_questions, 1):
1329
- logging.info(f"\n{'='*60}")
1330
- logging.info(f"πŸ” Test Question {i}: {question}")
1331
- logging.info('='*60)
1332
-
1333
- start_time = time.time()
1334
- try:
1335
- response = agent.run(question)
1336
- elapsed_time = time.time() - start_time
1337
-
1338
- logging.info(f"βœ… Agent Answer: {response}")
1339
- logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
1340
-
1341
- except Exception as e:
1342
- logging.error(f"❌ Error processing question {i}: {e}")
1343
-
1344
- time.sleep(2) # Prevent rate limiting
1345
-
1346
- logging.info(f"\n{'='*60}")
1347
- logging.info("🏁 Testing completed!")
1348
- logging.info('='*60)
1349
-
1350
  except Exception as e:
1351
- logging.critical(f"πŸ’₯ Critical error during testing: {e}", exc_info=True)
1352
-
1353
- if __name__ == "__main__":
1354
- main(), elem:
1355
- # It's a number
1356
- normalized_elements.append(normalize_answer_format(elem, "number"))
1357
- else:
1358
- # It's a string
1359
- normalized_elements.append(normalize_answer_format(elem, "string"))
1360
- return ', '.join(normalized_elements)
1361
-
1362
- return answer
1363
-
1364
- # --- Enhanced Agent Wrapper ---
1365
-
1366
- def create_gaia_agent_wrapper(agent):
1367
- """
1368
- Create a wrapper around the agent that ensures GAIA format compliance.
1369
- """
1370
- def gaia_agent_run(question: str) -> str:
1371
- """
1372
- Run the agent with GAIA format compliance.
1373
- Returns only the final answer in the correct format.
1374
- """
1375
- try:
1376
- # Add explicit formatting instruction to the question
1377
- formatted_question = f"""
1378
- {question}
1379
-
1380
- Remember to end your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
1381
-
1382
- Follow GAIA formatting rules:
1383
- - Numbers: No commas, no units (unless specified)
1384
- - Strings: No articles, no abbreviations, digits in plain text
1385
- - Lists: Comma-separated following above rules for each element
1386
- """
1387
-
1388
- # Get the full response from the agent
1389
- full_response = agent.run(formatted_question)
1390
-
1391
- # Extract and normalize the final answer
1392
- final_answer = extract_final_answer(full_response)
1393
- normalized_answer = normalize_answer_format(final_answer)
1394
-
1395
- logging.info(f"🎯 Question: {question}")
1396
- logging.info(f"πŸ€– Full response: {full_response}")
1397
- logging.info(f"βœ… Final answer: {normalized_answer}")
1398
-
1399
- return normalized_answer
1400
-
1401
- except Exception as e:
1402
- error_msg = f"Agent execution error: {e}"
1403
- logging.error(error_msg)
1404
- return f"ERROR: {e}"
1405
-
1406
- return gaia_agent_run
1407
-
1408
- def initialize_agent():
1409
- """
1410
- Initializes an enhanced multi-disciplinary agent optimized for GAIA benchmark questions.
1411
- """
1412
- configure_logging()
1413
- logging.info("πŸš€ Starting GAIA agent initialization...")
1414
-
1415
- try:
1416
- api_keys = load_api_keys()
1417
- except Exception as e:
1418
- logging.error(f"Failed to load API keys: {e}")
1419
- return None
1420
-
1421
- # --- Enhanced Caching Layer for LlamaIndex ---
1422
- @lru_cache(maxsize=64) # Increased cache size
1423
- @retry(max_retries=3)
1424
- def get_webpage_index(url: str) -> VectorStoreIndex:
1425
- logging.info(f"πŸ“„ Indexing webpage: {url}")
1426
- try:
1427
- loader_cls = download_loader("BeautifulSoupWebReader")
1428
- loader = loader_cls()
1429
- docs = loader.load_data(urls=[url])
1430
- if not docs:
1431
- raise ValueError(f"No content could be extracted from {url}")
1432
-
1433
- # Filter out very short documents
1434
- valid_docs = [doc for doc in docs if len(doc.text.strip()) > 50]
1435
- if not valid_docs:
1436
- raise ValueError(f"No substantial content found in {url}")
1437
-
1438
- return VectorStoreIndex.from_documents(valid_docs)
1439
- except Exception as e:
1440
- logging.error(f"Error indexing webpage {url}: {e}")
1441
- raise
1442
-
1443
- @lru_cache(maxsize=32)
1444
- @retry(max_retries=3)
1445
- def get_youtube_index(video_id: str) -> VectorStoreIndex:
1446
- logging.info(f"πŸŽ₯ Indexing YouTube video: {video_id}")
1447
- try:
1448
- # Try to get English transcript first
1449
- try:
1450
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
1451
- except (TranscriptsDisabled, NoTranscriptFound):
1452
- # Try auto-generated or any available transcript
1453
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
1454
- try:
1455
- transcript = transcript_list.find_transcript(['en']).fetch()
1456
- except:
1457
- # Get any available transcript
1458
- available_transcripts = list(transcript_list)
1459
- if not available_transcripts:
1460
- raise YouTubeTranscriptApiError(f"No transcripts available for video {video_id}")
1461
- transcript = available_transcripts[0].fetch()
1462
-
1463
- if not transcript:
1464
- raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
1465
-
1466
- # Combine transcript with timestamps for better context
1467
- text_segments = []
1468
- for entry in transcript:
1469
- timestamp = int(entry.get('start', 0))
1470
- text = entry.get('text', '').strip()
1471
- if text:
1472
- text_segments.append(f"[{timestamp}s] {text}")
1473
-
1474
- full_text = ' '.join(text_segments)
1475
- if not full_text.strip():
1476
- raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
1477
-
1478
- doc = Document(
1479
- text=full_text,
1480
- doc_id=f"youtube_{video_id}",
1481
- metadata={"source": f"https://youtube.com/watch?v={video_id}"}
1482
- )
1483
- return VectorStoreIndex.from_documents([doc])
1484
-
1485
- except Exception as e:
1486
- logging.error(f"Error indexing YouTube video {video_id}: {e}")
1487
- raise
1488
-
1489
- # --- Enhanced Tool Definitions ---
1490
-
1491
- @tool
1492
- def advanced_web_query(url: str, query: str) -> str:
1493
- """
1494
- Extract specific information from a webpage using advanced querying.
1495
- Handles various content types and provides detailed responses.
1496
- Args:
1497
- url: The webpage URL to analyze
1498
- query: Specific question to ask about the content
1499
- """
1500
- try:
1501
- if not url.startswith(('http://', 'https://')):
1502
- url = 'https://' + url
1503
-
1504
- logging.info(f"πŸ” Querying webpage: {url} with query: {query}")
1505
- index = get_webpage_index(url)
1506
- query_engine = index.as_query_engine(
1507
- similarity_top_k=8, # Increased for better coverage
1508
- response_mode="tree_summarize",
1509
- verbose=True
1510
- )
1511
-
1512
- response = query_engine.query(query)
1513
- result = clean_text_output(str(response))
1514
-
1515
- # If the response seems incomplete, try a broader query
1516
- if len(result) < 50 and "not found" not in result.lower():
1517
- broader_query = f"Information about {query.split()[-1] if query.split() else query}"
1518
- broader_response = query_engine.query(broader_query)
1519
- broader_result = clean_text_output(str(broader_response))
1520
- if len(broader_result) > len(result):
1521
- result = broader_result
1522
-
1523
- return result
1524
-
1525
- except Exception as e:
1526
- error_msg = f"Error querying webpage {url}: {e}"
1527
- logging.error(error_msg)
1528
- return error_msg
1529
-
1530
- @tool
1531
- def enhanced_youtube_query(video_url_or_id: str, query: str) -> str:
1532
- """
1533
- Extract information from YouTube video transcripts with enhanced processing.
1534
- Handles timestamps and provides contextual responses.
1535
- Args:
1536
- video_url_or_id: YouTube URL or video ID
1537
- query: Specific question about the video content
1538
- """
1539
- try:
1540
- video_id = extract_video_id(video_url_or_id)
1541
- if not video_id:
1542
- return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
1543
-
1544
- logging.info(f"🎬 Querying YouTube video: {video_id} with query: {query}")
1545
- index = get_youtube_index(video_id)
1546
- query_engine = index.as_query_engine(
1547
- similarity_top_k=6,
1548
- response_mode="tree_summarize",
1549
- verbose=True
1550
- )
1551
-
1552
- response = query_engine.query(query)
1553
- result = clean_text_output(str(response))
1554
-
1555
- return result
1556
-
1557
- except YouTubeTranscriptApiError as e:
1558
- error_msg = f"YouTube transcript error for {video_url_or_id}: {e}"
1559
- logging.error(error_msg)
1560
- return error_msg
1561
- except Exception as e:
1562
- error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
1563
- logging.error(error_msg)
1564
- return error_msg
1565
-
1566
- @tool
1567
- def enhanced_python_execution(code: str) -> str:
1568
- """
1569
- Execute Python code with enhanced capabilities and error handling.
1570
- Includes mathematical, data processing, and web scraping capabilities.
1571
- Args:
1572
- code: Python code to execute
1573
- """
1574
- # Expanded safe globals with more libraries
1575
- safe_globals = {}
1576
- try:
1577
- # Basic Python modules
1578
- import math, datetime, json, re, collections, itertools, random
1579
- from fractions import Fraction
1580
- from decimal import Decimal
1581
- import statistics
1582
-
1583
- safe_globals.update({
1584
- 'math': math, 'datetime': datetime, 'json': json, 're': re,
1585
- 'collections': collections, 'itertools': itertools, 'random': random,
1586
- 'Fraction': Fraction, 'Decimal': Decimal, 'statistics': statistics
1587
- })
1588
-
1589
- # Scientific computing
1590
- try:
1591
- import numpy as np
1592
- safe_globals['np'] = np
1593
- safe_globals['numpy'] = np
1594
- except ImportError:
1595
- logging.warning("NumPy not available")
1596
-
1597
- try:
1598
- import pandas as pd
1599
- safe_globals['pd'] = pd
1600
- safe_globals['pandas'] = pd
1601
- except ImportError:
1602
- logging.warning("Pandas not available")
1603
-
1604
- # Web requests for data fetching
1605
- try:
1606
- import requests
1607
- safe_globals['requests'] = requests
1608
- except ImportError:
1609
- logging.warning("Requests not available")
1610
-
1611
- except ImportError as e:
1612
- logging.warning(f"Some modules not available: {e}")
1613
-
1614
- # Capture both stdout and stderr
1615
- stdout_capture = io.StringIO()
1616
- stderr_capture = io.StringIO()
1617
-
1618
- try:
1619
- logging.info(f"🐍 Executing Python code: {code[:100]}...")
1620
-
1621
- with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
1622
- # Use exec with restricted builtins for safety
1623
- restricted_builtins = {
1624
- 'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
1625
- 'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
1626
- 'enumerate': enumerate, 'filter': filter, 'float': float,
1627
- 'format': format, 'hex': hex, 'int': int, 'len': len,
1628
- 'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
1629
- 'ord': ord, 'pow': pow, 'print': print, 'range': range,
1630
- 'repr': repr, 'reversed': reversed, 'round': round,
1631
- 'set': set, 'sorted': sorted, 'str': str, 'sum': sum,
1632
- 'tuple': tuple, 'type': type, 'zip': zip,
1633
- }
1634
-
1635
- exec(code, {"__builtins__": restricted_builtins}, safe_globals)
1636
-
1637
- stdout_result = stdout_capture.getvalue()
1638
- stderr_result = stderr_capture.getvalue()
1639
-
1640
- # Combine outputs
1641
- result_parts = []
1642
- if stdout_result.strip():
1643
- result_parts.append(stdout_result.strip())
1644
- if stderr_result.strip():
1645
- result_parts.append(f"Warnings/Errors: {stderr_result.strip()}")
1646
-
1647
- if result_parts:
1648
- return '\n'.join(result_parts)
1649
- else:
1650
- return "Code executed successfully (no output)"
1651
-
1652
- except Exception as e:
1653
- error_msg = f"Code execution error: {e}"
1654
- stderr_result = stderr_capture.getvalue()
1655
- if stderr_result.strip():
1656
- error_msg += f"\nAdditional details: {stderr_result.strip()}"
1657
- logging.error(error_msg)
1658
- return error_msg
1659
-
1660
- @tool
1661
- def enhanced_wikipedia_search(query: str, detailed: bool = True) -> str:
1662
- """
1663
- Search Wikipedia with enhanced content extraction and error handling.
1664
- Args:
1665
- query: Search term
1666
- detailed: Whether to return detailed information or just summary
1667
- """
1668
- try:
1669
- import wikipedia
1670
- wikipedia.set_lang("en")
1671
- wikipedia.set_rate_limiting(True)
1672
-
1673
- logging.info(f"πŸ“š Searching Wikipedia for: {query}")
1674
-
1675
- # Handle disambiguation and search suggestions
1676
- try:
1677
- page = wikipedia.page(query, auto_suggest=True)
1678
- except wikipedia.DisambiguationError as e:
1679
- # Take the first option from disambiguation
1680
- if e.options:
1681
- page = wikipedia.page(e.options[0])
1682
- else:
1683
- return f"Wikipedia disambiguation error for '{query}': {e}"
1684
- except wikipedia.PageError:
1685
- # Try searching if direct page lookup fails
1686
- search_results = wikipedia.search(query, results=3)
1687
- if search_results:
1688
- page = wikipedia.page(search_results[0])
1689
- else:
1690
- return f"No Wikipedia results found for '{query}'"
1691
-
1692
- if detailed:
1693
- # Get more comprehensive content
1694
- content_sections = []
1695
- content_sections.append(f"**{page.title}**")
1696
- content_sections.append(f"Summary: {page.summary}")
1697
-
1698
- # Add first few sections if available
1699
- if hasattr(page, 'content') and page.content:
1700
- sections = page.content.split('\n\n')[:3] # First 3 paragraphs
1701
- for section in sections:
1702
- if section.strip() and len(section) > 50:
1703
- content_sections.append(section.strip())
1704
-
1705
- content_sections.append(f"Source: {page.url}")
1706
- return '\n\n'.join(content_sections)
1707
- else:
1708
- return f"**{page.title}**\n\n{page.summary}\n\nSource: {page.url}"
1709
-
1710
- except ImportError:
1711
- return "Wikipedia library not installed. Cannot perform search."
1712
- except Exception as e:
1713
- error_msg = f"Wikipedia search error for '{query}': {e}"
1714
- logging.error(error_msg)
1715
- return error_msg
1716
-
1717
- @tool
1718
- def data_processing_tool(data_description: str, operation: str) -> str:
1719
- """
1720
- Process and analyze data based on descriptions and operations.
1721
- Useful for mathematical calculations, data analysis, and structured data processing.
1722
- Args:
1723
- data_description: Description of the data or data source
1724
- operation: The operation to perform (calculate, analyze, extract, etc.)
1725
- """
1726
- try:
1727
- logging.info(f"πŸ“Š Processing data: {data_description} | Operation: {operation}")
1728
-
1729
- # This tool is designed to work with the Python execution tool
1730
- # for complex data processing tasks
1731
- code_template = f"""
1732
- # Data processing task: {operation}
1733
- # Data description: {data_description}
1734
-
1735
- # Add your specific data processing logic here
1736
- # This is a template - specific implementation depends on the data and operation
1737
-
1738
- print("Data processing task initiated")
1739
- print(f"Description: {data_description}")
1740
- print(f"Operation: {operation}")
1741
-
1742
- # Example operations:
1743
- if "calculate" in "{operation}".lower():
1744
- print("Performing calculation...")
1745
- elif "analyze" in "{operation}".lower():
1746
- print("Performing analysis...")
1747
- elif "extract" in "{operation}".lower():
1748
- print("Extracting information...")
1749
-
1750
- print("Task completed - use enhanced_python_execution for specific calculations")
1751
- """
1752
-
1753
- return enhanced_python_execution(code_template)
1754
-
1755
- except Exception as e:
1756
- error_msg = f"Data processing error: {e}"
1757
- logging.error(error_msg)
1758
- return error_msg
1759
-
1760
- # --- Model and Agent Setup ---
1761
-
1762
- try:
1763
- # Use a more capable model for better performance
1764
  model = InferenceClientModel(
1765
- model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo", # Upgraded model
1766
  token=api_keys['together'],
1767
  provider="together"
1768
  )
1769
- logging.info("βœ… Model loaded successfully")
1770
- except Exception as e:
1771
- logging.error(f"Failed to load primary model, falling back: {e}")
1772
- try:
1773
- # Fallback model
1774
- model = InferenceClientModel(
1775
- model_id="Qwen/Qwen2.5-7B-Instruct",
1776
- token=api_keys['together'],
1777
- provider="together"
1778
- )
1779
- logging.info("βœ… Fallback model loaded successfully")
1780
- except Exception as e2:
1781
- logging.error(f"Failed to load fallback model: {e2}")
1782
- raise
1783
-
1784
- # Configure Google Search tool
1785
- google_search_tool = None
1786
- if api_keys['serpapi']:
1787
- try:
1788
- google_search_tool = GoogleSearchTool(
1789
- provider='serpapi',
1790
- serpapi_api_key=api_keys['serpapi']
1791
- )
1792
- logging.info("βœ… Google Search tool configured")
1793
- except Exception as e:
1794
- logging.warning(f"Failed to configure Google Search tool: {e}")
1795
-
1796
- # Prepare tools list
1797
- tools_list = [
1798
- enhanced_wikipedia_search,
1799
- advanced_web_query,
1800
- enhanced_youtube_query,
1801
- enhanced_python_execution,
1802
- data_processing_tool,
1803
- ]
1804
-
1805
- if google_search_tool:
1806
- tools_list.insert(0, google_search_tool)
1807
 
1808
- # Specialized worker agent with comprehensive toolset
1809
- worker_agent = ToolCallingAgent(
1810
- tools=tools_list,
1811
- model=model,
1812
- max_steps=8, # Increased for complex tasks
1813
- name="gaia_specialist",
1814
- description="Advanced specialist agent for GAIA benchmark: web research, document analysis, video processing, mathematical computation, and data analysis."
1815
- )
1816
 
1817
- # Enhanced strategic manager agent
1818
- manager_tools = []
1819
- if google_search_tool:
1820
- manager_tools.append(google_search_tool)
1821
 
1822
  manager = CodeAgent(
1823
  model=model,
1824
- managed_agents=[worker_agent],
1825
- tools=manager_tools,
1826
- instructions="""You are a general AI assistant designed for the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions that require deep reasoning and analysis.
1827
-
1828
- **CRITICAL: ANSWER FORMAT REQUIREMENT**
1829
- You MUST finish your response with: FINAL ANSWER: [YOUR FINAL ANSWER]
1830
-
1831
- YOUR FINAL ANSWER formatting rules:
1832
- - For NUMBERS: No commas, no units (like $ or %), no additional text
1833
- Example: "FINAL ANSWER: 42" NOT "FINAL ANSWER: 42 dollars" or "FINAL ANSWER: $42"
1834
- - For STRINGS: No articles (a, an, the), no abbreviations, write digits in plain text
1835
- Example: "FINAL ANSWER: New York City" NOT "FINAL ANSWER: NYC" or "FINAL ANSWER: The Big Apple"
1836
- - For LISTS: Comma-separated, apply above rules to each element
1837
- Example: "FINAL ANSWER: Paris, London, Berlin" or "FINAL ANSWER: 1.5, 2.3, 4.7"
1838
-
1839
- **STRATEGIC APPROACH:**
1840
-
1841
- 1. **ANALYZE THE QUESTION**: Determine what type of answer is expected (number, string, or list)
1842
-
1843
- 2. **DECOMPOSE THE PROBLEM**: Break complex questions into sub-problems:
1844
- - Identify required information sources
1845
- - Plan tool usage sequence
1846
- - Consider verification steps
1847
-
1848
- 3. **TOOL SELECTION**:
1849
- - Use GoogleSearchTool for current information and general web queries
1850
- - Delegate to gaia_specialist for complex multi-tool analysis:
1851
- * advanced_web_query: Deep webpage content analysis
1852
- * enhanced_youtube_query: Video transcript analysis
1853
- * enhanced_python_execution: Mathematical calculations and data processing
1854
- * enhanced_wikipedia_search: Encyclopedic knowledge
1855
- * data_processing_tool: Structured data analysis
1856
-
1857
- 4. **VERIFICATION**: Cross-check critical information and validate calculations
1858
-
1859
- **DELEGATION EXAMPLES**:
1860
-
1861
- Simple queries:
1862
- ```python
1863
- # Direct search for current information
1864
- result = search_tool.run("population Tokyo 2024")
1865
- # Extract and format the answer properly
1866
- ```
1867
-
1868
- Complex analysis:
1869
- ```python
1870
- # Delegate comprehensive tasks to specialist
1871
- answer = gaia_specialist.run('''
1872
- Find the founding year of the company mentioned in this video: [URL],
1873
- calculate years from founding to 2024,
1874
- then identify a major historical event from that founding year.
1875
- Format the final answer according to GAIA requirements.
1876
- ''')
1877
- ```
1878
-
1879
- **RESPONSE STRUCTURE**:
1880
- 1. Show your reasoning and steps
1881
- 2. Use tools to gather information
1882
- 3. Verify your findings
1883
- 4. Format the final answer correctly
1884
- 5. End with "FINAL ANSWER: [answer]"
1885
-
1886
- **EXAMPLES OF PROPER FORMATTING**:
1887
- - Question asks for a year: "FINAL ANSWER: 1991"
1888
- - Question asks for a city: "FINAL ANSWER: San Francisco"
1889
- - Question asks for a percentage: "FINAL ANSWER: 25" (not "25%" unless specified)
1890
- - Question asks for a list of countries: "FINAL ANSWER: France, Germany, Italy"
1891
- - Question asks for a calculation result: "FINAL ANSWER: 456"
1892
 
1893
- Remember: Be methodical, verify your information, and always end with the properly formatted FINAL ANSWER."""
 
 
 
 
1894
  )
1895
 
1896
- logging.info("🎯 Enhanced GAIA agent initialized successfully!")
1897
- return manager
 
 
1898
 
1899
  # --- Main Execution Block for Local Testing ---
1900
 
1901
  def main():
1902
- """Test the agent with sample GAIA-style questions."""
1903
  configure_logging()
1904
- logging.info("πŸ§ͺ Starting local testing...")
1905
 
1906
- try:
1907
- agent = initialize_agent()
1908
- if not agent:
1909
- logging.error("Agent initialization failed")
1910
- return
1911
 
1912
- # More challenging test questions similar to GAIA
1913
- test_questions = [
1914
- "What is 15! / (12! * 3!) ?",
1915
- "In what year was the Python programming language first released?",
1916
- "What is the square root of 2,025?",
1917
- ]
 
 
 
1918
 
1919
- for i, question in enumerate(test_questions, 1):
1920
- logging.info(f"\n{'='*60}")
1921
- logging.info(f"πŸ” Test Question {i}: {question}")
1922
- logging.info('='*60)
1923
-
1924
- start_time = time.time()
1925
- try:
1926
- response = agent.run(question)
1927
- elapsed_time = time.time() - start_time
1928
-
1929
- logging.info(f"βœ… Agent Answer: {response}")
1930
- logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
1931
-
1932
- except Exception as e:
1933
- logging.error(f"❌ Error processing question {i}: {e}")
1934
-
1935
- time.sleep(2) # Prevent rate limiting
1936
-
1937
- logging.info(f"\n{'='*60}")
1938
- logging.info("🏁 Testing completed!")
1939
- logging.info('='*60)
1940
 
1941
- except Exception as e:
1942
- logging.critical(f"πŸ’₯ Critical error during testing: {e}", exc_info=True)
 
 
 
 
1943
 
1944
  if __name__ == "__main__":
1945
  main()
 
8
  import json
9
  from functools import lru_cache, wraps
10
  from typing import Optional, Dict, Any, List
 
11
 
12
  from dotenv import load_dotenv
13
  from requests.exceptions import RequestException
 
14
  import serpapi
15
  from llama_index.core import VectorStoreIndex, download_loader
16
  from llama_index.core.schema import Document
17
+ from youtube-transcript-api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
 
18
 
19
  # --- Correctly import the specific tools from smolagents ---
20
  from smolagents import (
 
35
  datefmt="%Y-%m-%d %H:%M:%S"
36
  )
37
 
38
+ def load_api_keys() -> Dict[str, Optional[str]]:
39
+ """Loads API keys from environment variables."""
40
  load_dotenv()
41
  keys = {
42
  'together': os.getenv('TOGETHER_API_KEY'),
43
  'serpapi': os.getenv('SERPAPI_API_KEY'),
 
44
  }
 
 
45
  for key_name, key_value in keys.items():
46
  if key_value:
47
  logging.info(f"βœ… {key_name.upper()} API key loaded")
 
49
  logging.warning(f"⚠️ {key_name.upper()} API key not found")
50
 
51
  if not keys['together']:
52
+ raise ValueError("TOGETHER_API_KEY is required but not found.")
 
53
  return keys
54
 
55
  # --- Custom Exceptions ---
56
+ class SerpApiClientException(Exception): pass
57
+ class YouTubeTranscriptApiError(Exception): pass
 
 
 
 
 
 
58
 
59
  # --- Enhanced Decorators ---
60
 
61
  def retry(max_retries=3, initial_delay=1, backoff=2):
62
+ """A robust retry decorator with exponential backoff."""
63
  def decorator(func):
64
  @wraps(func)
65
  def wrapper(*args, **kwargs):
66
  delay = initial_delay
67
+ retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError, TranscriptsDisabled, NoTranscriptFound)
 
 
 
 
 
 
 
 
 
 
68
  for attempt in range(1, max_retries + 1):
69
  try:
70
  return func(*args, **kwargs)
71
  except retryable_exceptions as e:
 
72
  if attempt == max_retries:
73
  logging.error(f"{func.__name__} failed after {attempt} attempts: {e}")
74
+ # BUG FIX: Return a descriptive error string instead of raising, which could crash the agent.
75
+ return f"Tool Error: {func.__name__} failed after {max_retries} attempts. Details: {e}"
76
  logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...")
77
  time.sleep(delay)
78
  delay *= backoff
79
  except Exception as e:
80
  logging.error(f"{func.__name__} failed with a non-retryable error: {e}")
81
+ return f"Tool Error: A non-retryable error occurred in {func.__name__}: {e}"
 
 
 
82
  return wrapper
83
  return decorator
84
 
85
  # --- Enhanced Helper Functions ---
86
 
87
  def extract_video_id(url_or_id: str) -> Optional[str]:
88
+ """Extracts YouTube video ID from various URL formats."""
89
+ if not url_or_id: return None
 
 
 
90
  url_or_id = url_or_id.strip()
 
 
91
  if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
92
  return url_or_id
 
 
93
  patterns = [
94
+ r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/|youtube-nocookie\.com/embed/)([a-zA-Z0-9_-]{11})'
 
 
95
  ]
 
96
  for pattern in patterns:
97
  match = re.search(pattern, url_or_id)
98
  if match:
99
  return match.group(1)
 
100
  return None
101
 
102
  def clean_text_output(text: str) -> str:
103
+ """Cleans and normalizes text output."""
104
+ if not text: return ""
105
+ text = re.sub(r'\s+', ' ', text).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  return text
107
 
108
+ # --- Answer Formatting and Extraction (CRITICAL FOR GAIA) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def extract_final_answer(response: str) -> str:
111
+ """Extracts the final answer from the agent's full response string."""
112
+ if not response: return ""
113
+ match = re.search(r'FINAL\s+ANSWER\s*:\s*(.*)', response, re.IGNORECASE | re.DOTALL)
 
 
 
 
 
 
 
 
114
  if match:
115
+ return match.group(1).strip()
 
 
 
 
116
 
117
+ # Fallback if the pattern is missing
118
  lines = response.strip().split('\n')
119
+ return lines[-1].strip()
 
 
 
 
 
 
 
 
120
 
121
+ def normalize_answer_format(answer: str) -> str:
122
+ """Normalizes the extracted answer to meet strict GAIA formatting requirements."""
123
+ if not answer: return ""
 
 
 
 
 
 
 
 
124
 
125
+ answer = answer.strip().rstrip('.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ # Auto-detect type
128
+ is_list = ',' in answer and len(answer.split(',')) > 1
129
+ is_numeric = False
130
  try:
131
+ # Check if it can be converted to a float (handles integers and floats)
132
+ float(answer.replace(',', ''))
133
+ is_numeric = not is_list # A list of numbers is a list, not a single number
134
+ except ValueError:
135
+ is_numeric = False
136
+
137
+ if is_numeric:
138
+ return re.sub(r'[,$%]', '', answer).strip()
139
+ elif is_list:
140
+ elements = [elem.strip() for elem in answer.split(',')]
141
+ # Recursively normalize each element of the list
142
+ normalized_elements = [normalize_answer_format(elem) for elem in elements]
143
+ return ', '.join(normalized_elements)
144
+ else: # Is a string
145
+ # Expand common abbreviations
146
+ abbreviations = {'NYC': 'New York City', 'LA': 'Los Angeles', 'SF': 'San Francisco'}
147
+ return abbreviations.get(answer.upper(), answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ # --- Agent Wrapper for GAIA Compliance ---
150
 
151
+ def create_gaia_agent_wrapper(agent: CodeAgent):
152
+ """
153
+ Creates a callable wrapper around the agent to enforce GAIA answer formatting.
154
+ This is a key component for ensuring the final output is compliant.
155
+ """
156
+ def gaia_compliant_agent(question: str) -> str:
157
+ logging.info(f"Received question for GAIA compliant agent: '{question}'")
158
+ full_response = agent.run(question)
159
+ logging.info(f"Agent raw response:\n---\n{full_response}\n---")
 
160
 
161
+ final_answer = extract_final_answer(full_response)
162
+ normalized_answer = normalize_answer_format(final_answer)
 
 
 
 
163
 
164
+ logging.info(f"Extracted final answer: '{final_answer}'")
165
+ logging.info(f"Normalized answer for submission: '{normalized_answer}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ return normalized_answer
168
+ return gaia_compliant_agent
169
 
170
+ # --- Main Agent Initialization ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  def initialize_agent():
173
+ """Initializes the enhanced multi-disciplinary agent for the GAIA benchmark."""
 
 
174
  configure_logging()
175
  logging.info("πŸš€ Starting GAIA agent initialization...")
176
 
177
  try:
178
  api_keys = load_api_keys()
179
+ except ValueError as e:
180
+ logging.error(f"FATAL: {e}")
181
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ # --- Tool Definitions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ @lru_cache(maxsize=64)
186
+ @retry
187
+ def get_webpage_index(url: str) -> VectorStoreIndex:
188
+ logging.info(f"πŸ“„ Indexing webpage: {url}")
189
+ loader = download_loader("BeautifulSoupWebReader")()
190
+ docs = loader.load_data(urls=[url])
191
+ if not docs or not any(len(doc.text.strip()) > 50 for doc in docs):
192
+ raise ValueError(f"No substantial content found in {url}")
193
+ return VectorStoreIndex.from_documents(docs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  @tool
196
  def enhanced_python_execution(code: str) -> str:
197
+ """Executes Python code in a restricted environment and returns the output."""
198
+ logging.info(f"🐍 Executing Python code: {code[:200]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  stdout_capture = io.StringIO()
 
 
200
  try:
201
+ # ENHANCEMENT: Restrict built-ins for better security
202
+ safe_globals = {
203
+ "requests": __import__("requests"), "pd": __import__("pandas"), "np": __import__("numpy"),
204
+ "datetime": __import__("datetime"), "math": __import__("math"), "re": __import__("re"),
205
+ "json": __import__("json"), "collections": __import__("collections")
206
+ }
207
+ restricted_builtins = {
208
+ 'print': print, 'len': len, 'range': range, 'str': str, 'int': int, 'float': float,
209
+ 'list': list, 'dict': dict, 'set': set, 'tuple': tuple, 'max': max, 'min': min, 'sum': sum,
210
+ 'sorted': sorted, 'round': round
211
+ }
212
+ with contextlib.redirect_stdout(stdout_capture):
 
 
 
 
213
  exec(code, {"__builtins__": restricted_builtins}, safe_globals)
214
 
215
+ result = stdout_capture.getvalue().strip()
216
+ return result if result else "Code executed successfully with no output."
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  except Exception as e:
218
  error_msg = f"Code execution error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  logging.error(error_msg)
220
  return error_msg
221
 
222
  # --- Model and Agent Setup ---
223
 
224
  try:
 
225
  model = InferenceClientModel(
226
+ model_id="meta-llama/Llama-3.1-70B-Instruct-Turbo",
227
  token=api_keys['together'],
228
  provider="together"
229
  )
230
+ logging.info("βœ… Primary model (Llama 3.1 70B) loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  except Exception as e:
232
+ logging.warning(f"⚠️ Failed to load primary model, falling back. Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  model = InferenceClientModel(
234
+ model_id="Qwen/Qwen2.5-7B-Instruct",
235
  token=api_keys['together'],
236
  provider="together"
237
  )
238
+ logging.info("βœ… Fallback model (Qwen 2.5 7B) loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ google_search_tool = GoogleSearchTool(provider='serpapi', serpapi_api_key=api_keys['serpapi']) if api_keys['serpapi'] else None
 
 
 
 
 
 
 
241
 
242
+ tools_list = [tool for tool in [google_search_tool, enhanced_python_execution] if tool]
 
 
 
243
 
244
  manager = CodeAgent(
245
  model=model,
246
+ tools=tools_list,
247
+ instructions="""You are a master AI assistant for the GAIA benchmark. Your goal is to provide a single, precise, and final answer.
248
+
249
+ **STRATEGY:**
250
+ 1. **Analyze**: Break down the user's question into steps.
251
+ 2. **Execute**: Use the provided tools (`GoogleSearchTool`, `enhanced_python_execution`) to find the information or perform calculations.
252
+ 3. **Synthesize**: Combine the results of your tool use to form a final answer.
253
+ 4. **Format**: Present your final answer clearly at the end of your response, prefixed with `FINAL ANSWER:`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ **CRITICAL INSTRUCTION:** You MUST end your entire response with the line `FINAL ANSWER: [Your Final Answer]`. The text that follows this prefix is what will be submitted. Adhere to strict formatting: no extra words, no currency symbols, no commas in numbers.
256
+ - For "What is 2*21?": `FINAL ANSWER: 42`
257
+ - For "Capital of France?": `FINAL ANSWER: Paris`
258
+ - For "What are the first three even numbers?": `FINAL ANSWER: 2, 4, 6`
259
+ """
260
  )
261
 
262
+ logging.info("🎯 GAIA agent initialized successfully!")
263
+
264
+ # BUG FIX: Return the wrapped, compliant agent instead of the raw manager.
265
+ return create_gaia_agent_wrapper(manager)
266
 
267
  # --- Main Execution Block for Local Testing ---
268
 
269
  def main():
270
+ """Tests the agent with sample GAIA-style questions."""
271
  configure_logging()
272
+ logging.info("πŸ§ͺ Starting local agent testing...")
273
 
274
+ agent = initialize_agent()
275
+ if not agent:
276
+ logging.critical("πŸ’₯ Agent initialization failed. Exiting.")
277
+ return
 
278
 
279
+ test_questions = [
280
+ "What is 15! / (12! * 3!)?",
281
+ "In what year was the Python programming language first released?",
282
+ "What is the square root of 2025?",
283
+ ]
284
+
285
+ for i, question in enumerate(test_questions, 1):
286
+ logging.info(f"\n{'='*60}\nπŸ” Test Question {i}: {question}\n{'='*60}")
287
+ start_time = time.time()
288
 
289
+ # BUG FIX: Call the agent wrapper directly, not agent.run()
290
+ final_answer = agent(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
+ elapsed_time = time.time() - start_time
293
+ logging.info(f"βœ… Submitted Answer: {final_answer}")
294
+ logging.info(f"⏱️ Execution time: {elapsed_time:.2f} seconds")
295
+ time.sleep(1)
296
+
297
+ logging.info(f"\n{'='*60}\n🏁 Testing complete!\n{'='*60}")
298
 
299
  if __name__ == "__main__":
300
  main()
requirements.txt CHANGED
@@ -17,3 +17,4 @@ requests
17
  llama-index
18
  beautifulsoup4
19
  lxml
 
 
17
  llama-index
18
  beautifulsoup4
19
  lxml
20
+ json