smolagent_course_final_assignment

Sleeping

App Files Files Community

Kai Jennissen commited on May 15

Commit

e74aed7

unverified ·

1 Parent(s): 8102d4b

added tools

Browse files

Files changed (3) hide show

agent.py +31 -13
app.py +4 -2
tools.py +18 -16

agent.py CHANGED Viewed

@@ -3,7 +3,7 @@ from smolagents import (
     CodeAgent,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
-    # InferenceClientModel,
     OpenAIServerModel,
     WikipediaSearchTool,
 )
@@ -79,12 +79,26 @@ if Text('Accept cookies?').exists():
 ```<end_code>
 """
-add_sys_prompt = """\n\nIf a file_url is available or an url is given in question statement, then request and use the content to answer the question. \
-        If a code file, such as .py file, is given, do not attempt to execute it but rather open it as a text file and analyze the content. \
-        When a tabluar file, such as csv, tsv, xlsx, is given, read it using pandas.
-        Make sure you provide the answer in accordance with the instruction provided in the question. Do not return the result of tool as a final_answer.
-        Do Not add any additional information, explanation, unnecessary words or symbols. The answer is likely as simple as one word."""
 def initialize_tracing(enabled=True, provider="langfuse"):
@@ -134,19 +148,24 @@ def get_agent():
         description="A web agent that can search the web and visit webpages.",
         verbosity_level=1,
     )
     mm_agent = CodeAgent(
         tools=[
             read_image,
             transcribe_audio,
             read_code,
             run_video,
         ],
-        model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
         max_steps=3,
         name="Multimedia_Agent",
-        description="An agent that can answer questions about all types of images, videos and speech. Needs to be provided with a valid url or an image.",
         verbosity_level=1,
     )
     # Initialize the model
     # vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
@@ -168,16 +187,15 @@ def get_agent():
     # Import helium for the agent
     # Create manager agent
     manager_agent = CodeAgent(
-        tools=[fetch_task_files],
-        managed_agents=[web_agent, mm_agent],
         model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
         max_steps=5,
         planning_interval=10,
         additional_authorized_imports=["pandas", "numpy"],
-        verbosity_level=1,
     )
-    manager_agent.prompt_templates["system_prompt"] += add_sys_prompt
     return manager_agent

     CodeAgent,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
+    InferenceClientModel,
     OpenAIServerModel,
     WikipediaSearchTool,
 )
 ```<end_code>
 """
+add_sys_prompt = """\n\nWhen processing tasks with files:
+1. Use the fetch_task_files tool with the URL provided to you to download and process files
+2. Depending on the file type returned, use the appropriate specialized tool:
+   - For images: Use the data_url returned with read_image tool
+   - For audio: Use the audio data with transcribe_audio tool
+   - For code files: Use read_code tool
+   - For videos: Use run_video tool
+3. When handling different file types:
+   - Images: The fetch_task_files tool will return a data_url you can use directly with read_image
+   - Code: Do not execute code files, analyze them as text
+   - Tabular data (CSV, Excel): Use pandas to analyze the data
+   - Videos: Extract relevant information from visual frames and audio
+4. Keep answers concise and to the point. The answer is likely as simple as one word.
+5. Make sure you provide the answer in accordance with the instruction provided in the question.
+6. Do not return the raw result of tool calls as your final answer.
+7. Do not add any additional information, explanation, unnecessary words or symbols.
+"""
 def initialize_tracing(enabled=True, provider="langfuse"):
         description="A web agent that can search the web and visit webpages.",
         verbosity_level=1,
     )
     mm_agent = CodeAgent(
         tools=[
+            fetch_task_files,
             read_image,
             transcribe_audio,
             read_code,
             run_video,
         ],
+        model=InferenceClientModel(
+            model_id="Qwen/Qwen2.5-VL-32B-Instruct",  # provider="together"
+        ),
         max_steps=3,
         name="Multimedia_Agent",
+        description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
         verbosity_level=1,
     )
+    mm_agent.prompt_templates["system_prompt"] += add_sys_prompt
     # Initialize the model
     # vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
     # Import helium for the agent
     # Create manager agent
     manager_agent = CodeAgent(
+        tools=[],
+        managed_agents=[mm_agent, web_agent],
         model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
         max_steps=5,
         planning_interval=10,
         additional_authorized_imports=["pandas", "numpy"],
+        verbosity_level=2,
     )
     return manager_agent

app.py CHANGED Viewed

@@ -29,11 +29,13 @@ class BasicAgent:
         # If task_id is provided, we'll include context about possible files
         if task_id:
             # Add context about files to the question
             context = f"""Task ID: {task_id}
-If you need files for this task, you can use the fetch_task_files tool with the task_id.
-Example: fetch_task_files(task_id="{task_id}")
 Question: {question}"""

         # If task_id is provided, we'll include context about possible files
         if task_id:
+            # API base URL for constructing file URLs
+            api_base_url = "https://agents-course-unit4-scoring.hf.space"
             # Add context about files to the question
             context = f"""Task ID: {task_id}
+IMPORTANT: If the question mentions an image, file, or other media, construct the file URL using: {api_base_url}/files/{task_id}
 Question: {question}"""

tools.py CHANGED Viewed

@@ -549,18 +549,18 @@ def process_binary(response, filename, content_type):
 @tool
-def fetch_task_files(task_id: str) -> Dict[str, Any]:
     """
-    Download files associated with a specific task from the API.
     Args:
-        task_id (str): The Task-ID of the task to download files for.
     Returns:
         dict: A dictionary containing file information and data in appropriate format for the file type
     """
-    api_base_url: str = "https://agents-course-unit4-scoring.hf.space"
-    files_url = f"{api_base_url}/files/{task_id}"
     try:
         response = requests.get(files_url, timeout=15)
@@ -572,7 +572,7 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]:
         if "filename=" in filename:
             filename = filename.split("filename=")[-1].strip('"')
         else:
-            filename = f"{task_id}.bin"  # Default filename
         print(f"Received file: {filename}, type: {content_type}")
@@ -620,10 +620,10 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]:
             return process_binary(response, filename, content_type)
     except requests.exceptions.RequestException as e:
-        print(f"Error fetching files for task {task_id}: {e}")
         return {"error": f"Error fetching files: {e}"}
     except Exception as e:
-        print(f"An unexpected error occurred fetching files for task {task_id}: {e}")
         return {"error": f"An unexpected error occurred: {e}"}
@@ -652,21 +652,23 @@ def search_wikipedia(query: str) -> str:
 if __name__ == "__main__":
-    # Simple test for fetch_task_files
-    task_ids = [
-        "cca530fc-4052-43b2-b130-b30968d8aa44",
-        "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
-        "7bd855d8-463d-4ed5-93ca-5fe35145f733",
     ]
-    for task_id in task_ids:
         print(
             "=" * 20
             + " "
-            + f"Testing fetch_task_files with task_id: {task_id}"
             + " "
             + "=" * 20
         )
-        result = fetch_task_files(task_id)
         print(f"File type: {result.get('file_type')}")
         print(f"Filename: {result.get('filename')}")

 @tool
+def fetch_task_files(url: str) -> Dict[str, Any]:
     """
+    Download files from a given URL.
     Args:
+        url (str): Direct URL to the file to download.
     Returns:
         dict: A dictionary containing file information and data in appropriate format for the file type
     """
+    files_url = url
+    print(f"Fetching file from: {files_url}")
     try:
         response = requests.get(files_url, timeout=15)
         if "filename=" in filename:
             filename = filename.split("filename=")[-1].strip('"')
         else:
+            filename = "file.bin"  # Default filename
         print(f"Received file: {filename}, type: {content_type}")
             return process_binary(response, filename, content_type)
     except requests.exceptions.RequestException as e:
+        print(f"Error fetching url: {files_url} - {e}")
         return {"error": f"Error fetching files: {e}"}
     except Exception as e:
+        print(f"An unexpected error occurred fetching files from url: {files_url}- {e}")
         return {"error": f"An unexpected error occurred: {e}"}
 if __name__ == "__main__":
+    # Simple test for fetch_task_files with direct URLs
+    api_base = "https://agents-course-unit4-scoring.hf.space"
+    test_urls = [
+        f"{api_base}/files/cca530fc-4052-43b2-b130-b30968d8aa44",
+        f"{api_base}/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
+        f"{api_base}/files/7bd855d8-463d-4ed5-93ca-5fe35145f733",
     ]
+    for url in test_urls:
         print(
             "=" * 20
             + " "
+            + f"Testing fetch_task_files with URL: {url}"
             + " "
             + "=" * 20
         )
+        result = fetch_task_files(url)
         print(f"File type: {result.get('file_type')}")
         print(f"Filename: {result.get('filename')}")