Update agent.py
Browse files
agent.py
CHANGED
|
@@ -124,38 +124,25 @@ All answers are graded by exact string match, so format carefully!"""
|
|
| 124 |
def __call__(self, prompt: str, **kwargs) -> str:
|
| 125 |
"""
|
| 126 |
Call the model with appropriate handling of system prompts for Anthropic
|
| 127 |
-
|
| 128 |
-
Args:
|
| 129 |
-
prompt: The prompt to send to the model
|
| 130 |
-
**kwargs: Additional arguments to pass to LiteLLM
|
| 131 |
-
|
| 132 |
-
Returns:
|
| 133 |
-
The model's response as a string
|
| 134 |
"""
|
| 135 |
# Wait according to rate limiter
|
| 136 |
RATE_LIMITER.wait()
|
| 137 |
|
| 138 |
try:
|
| 139 |
-
#
|
| 140 |
-
# We do this by using the 'messages' parameter directly with the system content
|
| 141 |
-
|
| 142 |
-
# Extract system_instruction from kwargs if it exists and remove it
|
| 143 |
-
# (to avoid the "Extra inputs are not permitted" error)
|
| 144 |
if 'system_instruction' in kwargs:
|
| 145 |
-
# We'll ignore it and use our stored system prompt instead
|
| 146 |
del kwargs['system_instruction']
|
| 147 |
|
| 148 |
-
#
|
| 149 |
-
messages = [
|
| 150 |
-
{"role": "system", "content": self.system_prompt},
|
| 151 |
-
{"role": "user", "content": prompt}
|
| 152 |
-
]
|
| 153 |
-
|
| 154 |
-
# Call LiteLLM with the proper message format for Anthropic
|
| 155 |
from litellm import completion
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
response = completion(
|
| 157 |
model=self.model_id,
|
| 158 |
-
messages=
|
| 159 |
api_key=self.api_key,
|
| 160 |
temperature=self.temperature,
|
| 161 |
max_tokens=self.max_tokens,
|
|
@@ -166,15 +153,13 @@ All answers are graded by exact string match, so format carefully!"""
|
|
| 166 |
return response.choices[0].message.content
|
| 167 |
|
| 168 |
except Exception as e:
|
|
|
|
| 169 |
if "rate_limit" in str(e).lower():
|
| 170 |
-
# Specific handling for rate limit errors
|
| 171 |
print(f"Rate limit error: {e}")
|
| 172 |
print("Waiting 60 seconds before retrying...")
|
| 173 |
time.sleep(60)
|
| 174 |
-
# Recursive retry after waiting
|
| 175 |
return self.__call__(prompt, **kwargs)
|
| 176 |
else:
|
| 177 |
-
# Re-raise other errors
|
| 178 |
print(f"Error calling Anthropic API: {e}")
|
| 179 |
raise
|
| 180 |
|
|
|
|
| 124 |
def __call__(self, prompt: str, **kwargs) -> str:
|
| 125 |
"""
|
| 126 |
Call the model with appropriate handling of system prompts for Anthropic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
"""
|
| 128 |
# Wait according to rate limiter
|
| 129 |
RATE_LIMITER.wait()
|
| 130 |
|
| 131 |
try:
|
| 132 |
+
# Remove system_instruction if present in kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if 'system_instruction' in kwargs:
|
|
|
|
| 134 |
del kwargs['system_instruction']
|
| 135 |
|
| 136 |
+
# For Anthropic via LiteLLM, use the direct completion method
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
from litellm import completion
|
| 138 |
+
|
| 139 |
+
# Create a simple prompt with system instructions at the beginning
|
| 140 |
+
# This avoids the nested message structure issue
|
| 141 |
+
complete_prompt = f"{self.system_prompt}\n\n{prompt}"
|
| 142 |
+
|
| 143 |
response = completion(
|
| 144 |
model=self.model_id,
|
| 145 |
+
messages=[{"role": "user", "content": complete_prompt}],
|
| 146 |
api_key=self.api_key,
|
| 147 |
temperature=self.temperature,
|
| 148 |
max_tokens=self.max_tokens,
|
|
|
|
| 153 |
return response.choices[0].message.content
|
| 154 |
|
| 155 |
except Exception as e:
|
| 156 |
+
# Handle rate limit errors
|
| 157 |
if "rate_limit" in str(e).lower():
|
|
|
|
| 158 |
print(f"Rate limit error: {e}")
|
| 159 |
print("Waiting 60 seconds before retrying...")
|
| 160 |
time.sleep(60)
|
|
|
|
| 161 |
return self.__call__(prompt, **kwargs)
|
| 162 |
else:
|
|
|
|
| 163 |
print(f"Error calling Anthropic API: {e}")
|
| 164 |
raise
|
| 165 |
|