Spaces:

real-jiakai
/

Agent_Course_Final_Assignment_Demo

Paused

App Files Files Community

real-jiakai commited on Apr 26

Commit

fa599aa

verified ·

1 Parent(s): 1320d0a

Update agent.py

Browse files

Files changed (1) hide show

agent.py +112 -94

agent.py CHANGED Viewed

@@ -25,7 +25,6 @@ from smolagents import (
     CodeAgent,
     DuckDuckGoSearchTool,
     PythonInterpreterTool,
-    LiteLLMModel,
     tool,
 )
@@ -77,6 +76,108 @@ class RateLimiter:
 # Global rate limiter instance
 RATE_LIMITER = RateLimiter(requests_per_minute=25)  # Keep below 40 for safety
 # --------------------------------------------------------------------------- #
 # custom tool: fetch GAIA attachments
 # --------------------------------------------------------------------------- #
@@ -259,90 +360,6 @@ def analyze_excel_file(file_path: str, query: str) -> str:
     except Exception as e:
         return f"Error analyzing Excel file: {str(e)}"
-# --------------------------------------------------------------------------- #
-# Custom LiteLLM model with rate limiting and error handling
-# --------------------------------------------------------------------------- #
-# --------------------------------------------------------------------------- #
-# Custom LiteLLM model with rate limiting and error handling
-# --------------------------------------------------------------------------- #
-class RateLimitedClaudeModel:
-    def __init__(
-        self,
-        model_id: str = "anthropic/claude-3-5-sonnet-20240620",
-        api_key: Optional[str] = None,
-        temperature: float = 0.1,
-        max_tokens: int = 1024,
-        max_retries: int = 3,
-        retry_delay: int = 5,
-    ):
-        """
-        Initialize a Claude model with rate limiting and error handling
-        Args:
-            model_id: The model ID to use
-            api_key: The API key to use
-            temperature: The temperature to use
-            max_tokens: The maximum number of tokens to generate
-            max_retries: The maximum number of retries on rate limit errors
-            retry_delay: The initial delay between retries (will increase exponentially)
-        """
-        # Get API key
-        if api_key is None:
-            api_key = os.getenv("ANTHROPIC_API_KEY")
-            if not api_key:
-                raise ValueError("No Anthropic token provided. Please set ANTHROPIC_API_KEY environment variable or pass api_key parameter.")
-        self.model_id = model_id
-        self.api_key = api_key
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.max_retries = max_retries
-        self.retry_delay = retry_delay
-        # Create the underlying LiteLLM model
-        self.model = LiteLLMModel(
-            model_id=model_id,
-            api_key=api_key,
-            temperature=temperature
-        )
-    def __call__(self, prompt: str, **kwargs) -> str:
-        """
-        Call the model with rate limiting and error handling
-        Args:
-            prompt: The prompt to generate from
-        Returns:
-            The generated text
-        """
-        # Make sure system_instruction is always present
-        if "system_instruction" not in kwargs:
-            system_instruction = """You are a concise, highly accurate assistant specialized in solving challenges.
-Your answers should be precise, direct, and exactly match the expected format.
-All answers are graded by exact string match, so format carefully!"""
-            kwargs["system_instruction"] = system_instruction
-        retries = 0
-        while True:
-            try:
-                # Wait according to rate limiter
-                RATE_LIMITER.wait()
-                # Call the model
-                return self.model(prompt, **kwargs)
-            except Exception as e:
-                # Check if it's a rate limit error
-                if "rate_limit_error" in str(e) and retries < self.max_retries:
-                    retries += 1
-                    sleep_time = self.retry_delay * (2 ** (retries - 1))  # Exponential backoff
-                    print(f"Rate limit exceeded, retrying in {sleep_time} seconds (attempt {retries}/{self.max_retries})...")
-                    time.sleep(sleep_time)
-                else:
-                    # If it's not a rate limit error or we've exceeded max retries, raise
-                    raise
 # --------------------------------------------------------------------------- #
 # GAIAAgent class
 # --------------------------------------------------------------------------- #
@@ -352,7 +369,6 @@ class GAIAAgent:
         api_key: Optional[str] = None,
         temperature: float = 0.1,
         verbose: bool = False,
-        system_prompt: Optional[str] = None,
         max_tokens: int = 1024,
     ):
         """
@@ -362,12 +378,13 @@ class GAIAAgent:
             api_key: Anthropic API key (fetched from environment if not provided)
             temperature: Temperature for text generation
             verbose: Enable verbose logging
-            system_prompt: Custom system prompt (optional)
             max_tokens: Maximum number of tokens to generate per response
         """
         # Set verbosity
         self.verbose = verbose
-        self.system_prompt = system_prompt or """You are a concise, highly accurate assistant specialized in solving challenges for the GAIA benchmark.
 Unless explicitly required, reply with ONE short sentence.
 Your answers should be precise, direct, and exactly match the expected format.
 All answers are graded by exact string match, so format carefully!"""
@@ -376,21 +393,22 @@ All answers are graded by exact string match, so format carefully!"""
         if api_key is None:
             api_key = os.getenv("ANTHROPIC_API_KEY")
             if not api_key:
-                raise ValueError("No Anthropic token provided. Please set ANTHROPIC_API_KEY environment variable or pass api_key parameter.")
         if self.verbose:
             print(f"Using Anthropic token: {api_key[:5]}...")
-        # Initialize Claude model with rate limiting
-        self.model = RateLimitedClaudeModel(
             model_id="anthropic/claude-3-5-sonnet-20240620",  # Use Claude 3.5 Sonnet
             api_key=api_key,
             temperature=temperature,
             max_tokens=max_tokens,
         )
         if self.verbose:
-            print(f"Initialized model: RateLimitedClaudeModel - anthropic/claude-3-5-sonnet-20240620")
         # Initialize default tools
         self.tools = [
@@ -457,7 +475,7 @@ All answers are graded by exact string match, so format carefully!"""
             if task_file_path:
                 try:
                     # Limit file content size to avoid token limits
-                    max_file_size = 10000  # Characters
                     with open(task_file_path, 'r', errors='ignore') as f:
                         file_content = f.read(max_file_size)
                         if len(file_content) >= max_file_size:
@@ -594,7 +612,7 @@ Example: If asked "What is the capital of France?", respond just with "Paris".
         return answer
 # --------------------------------------------------------------------------- #
-# GeminiAgent class - Wrapper around GAIAAgent
 # --------------------------------------------------------------------------- #
 class ClaudeAgent:
     """Claude-enhanced agent for GAIA challenge"""

     CodeAgent,
     DuckDuckGoSearchTool,
     PythonInterpreterTool,
     tool,
 )
 # Global rate limiter instance
 RATE_LIMITER = RateLimiter(requests_per_minute=25)  # Keep below 40 for safety
+# --------------------------------------------------------------------------- #
+# Fixed LiteLLM model for Anthropic
+# --------------------------------------------------------------------------- #
+class FixedAnthropicModel:
+    """
+    A wrapper around LiteLLM that properly handles Anthropic API calls
+    and avoids the "system_instruction: Extra inputs are not permitted" error
+    """
+    def __init__(
+        self,
+        model_id: str = "anthropic/claude-3-5-sonnet-20240620",
+        api_key: Optional[str] = None,
+        temperature: float = 0.1,
+        max_tokens: int = 1024,
+        system_prompt: Optional[str] = None,
+    ):
+        """
+        Initialize a model that properly handles system prompts for Anthropic via LiteLLM
+        Args:
+            model_id: Claude model ID to use
+            api_key: API key (will use ANTHROPIC_API_KEY env var if not provided)
+            temperature: Temperature for text generation
+            max_tokens: Maximum tokens to generate
+            system_prompt: System prompt to use
+        """
+        # Get API key from env if not provided
+        if api_key is None:
+            api_key = os.getenv("ANTHROPIC_API_KEY")
+            if not api_key:
+                raise ValueError("No Anthropic API key provided. Set ANTHROPIC_API_KEY env var.")
+        self.model_id = model_id
+        self.api_key = api_key
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        # Store the system prompt
+        self.system_prompt = system_prompt or """You are a concise, highly accurate assistant specialized in solving challenges.
+Your answers should be precise, direct, and exactly match the expected format.
+All answers are graded by exact string match, so format carefully!"""
+        print(f"Initialized FixedAnthropicModel with {model_id}")
+    def __call__(self, prompt: str, **kwargs) -> str:
+        """
+        Call the model with appropriate handling of system prompts for Anthropic
+        Args:
+            prompt: The prompt to send to the model
+            **kwargs: Additional arguments to pass to LiteLLM
+        Returns:
+            The model's response as a string
+        """
+        # Wait according to rate limiter
+        RATE_LIMITER.wait()
+        try:
+            # For Anthropic models, we need to modify how system prompts are handled
+            # We do this by using the 'messages' parameter directly with the system content
+            # Extract system_instruction from kwargs if it exists and remove it
+            # (to avoid the "Extra inputs are not permitted" error)
+            if 'system_instruction' in kwargs:
+                # We'll ignore it and use our stored system prompt instead
+                del kwargs['system_instruction']
+            # Create our messages array with the system message and user prompt
+            messages = [
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": prompt}
+            ]
+            # Call LiteLLM with the proper message format for Anthropic
+            from litellm import completion
+            response = completion(
+                model=self.model_id,
+                messages=messages,
+                api_key=self.api_key,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                **kwargs
+            )
+            # Extract the content from the response
+            return response.choices[0].message.content
+        except Exception as e:
+            if "rate_limit" in str(e).lower():
+                # Specific handling for rate limit errors
+                print(f"Rate limit error: {e}")
+                print("Waiting 60 seconds before retrying...")
+                time.sleep(60)
+                # Recursive retry after waiting
+                return self.__call__(prompt, **kwargs)
+            else:
+                # Re-raise other errors
+                print(f"Error calling Anthropic API: {e}")
+                raise
 # --------------------------------------------------------------------------- #
 # custom tool: fetch GAIA attachments
 # --------------------------------------------------------------------------- #
     except Exception as e:
         return f"Error analyzing Excel file: {str(e)}"
 # --------------------------------------------------------------------------- #
 # GAIAAgent class
 # --------------------------------------------------------------------------- #
         api_key: Optional[str] = None,
         temperature: float = 0.1,
         verbose: bool = False,
         max_tokens: int = 1024,
     ):
         """
             api_key: Anthropic API key (fetched from environment if not provided)
             temperature: Temperature for text generation
             verbose: Enable verbose logging
             max_tokens: Maximum number of tokens to generate per response
         """
         # Set verbosity
         self.verbose = verbose
+        # System prompt for all Claude interactions
+        self.system_prompt = """You are a concise, highly accurate assistant specialized in solving challenges for the GAIA benchmark.
 Unless explicitly required, reply with ONE short sentence.
 Your answers should be precise, direct, and exactly match the expected format.
 All answers are graded by exact string match, so format carefully!"""
         if api_key is None:
             api_key = os.getenv("ANTHROPIC_API_KEY")
             if not api_key:
+                raise ValueError("No Anthropic token provided. Please set ANTHROPIC_API_KEY environment variable.")
         if self.verbose:
             print(f"Using Anthropic token: {api_key[:5]}...")
+        # Initialize Claude model with our fixed wrapper
+        self.model = FixedAnthropicModel(
             model_id="anthropic/claude-3-5-sonnet-20240620",  # Use Claude 3.5 Sonnet
             api_key=api_key,
             temperature=temperature,
             max_tokens=max_tokens,
+            system_prompt=self.system_prompt,
         )
         if self.verbose:
+            print(f"Initialized model: FixedAnthropicModel - claude-3-5-sonnet-20240620")
         # Initialize default tools
         self.tools = [
             if task_file_path:
                 try:
                     # Limit file content size to avoid token limits
+                    max_file_size = 8000  # Characters - reduced further to help with token limits
                     with open(task_file_path, 'r', errors='ignore') as f:
                         file_content = f.read(max_file_size)
                         if len(file_content) >= max_file_size:
         return answer
 # --------------------------------------------------------------------------- #
+# ClaudeAgent class - Wrapper around GAIAAgent
 # --------------------------------------------------------------------------- #
 class ClaudeAgent:
     """Claude-enhanced agent for GAIA challenge"""