Kai Jennissen
		
	commited on
		
		
					added tools
Browse files
    	
        agent.py
    CHANGED
    
    | @@ -3,7 +3,7 @@ from smolagents import ( | |
| 3 | 
             
                CodeAgent,
         | 
| 4 | 
             
                DuckDuckGoSearchTool,
         | 
| 5 | 
             
                VisitWebpageTool,
         | 
| 6 | 
            -
                 | 
| 7 | 
             
                OpenAIServerModel,
         | 
| 8 | 
             
                WikipediaSearchTool,
         | 
| 9 | 
             
            )
         | 
| @@ -79,12 +79,26 @@ if Text('Accept cookies?').exists(): | |
| 79 | 
             
            ```<end_code>
         | 
| 80 | 
             
            """
         | 
| 81 |  | 
| 82 | 
            -
            add_sys_prompt = """\n\ | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 88 |  | 
| 89 |  | 
| 90 | 
             
            def initialize_tracing(enabled=True, provider="langfuse"):
         | 
| @@ -134,19 +148,24 @@ def get_agent(): | |
| 134 | 
             
                    description="A web agent that can search the web and visit webpages.",
         | 
| 135 | 
             
                    verbosity_level=1,
         | 
| 136 | 
             
                )
         | 
|  | |
| 137 | 
             
                mm_agent = CodeAgent(
         | 
| 138 | 
             
                    tools=[
         | 
|  | |
| 139 | 
             
                        read_image,
         | 
| 140 | 
             
                        transcribe_audio,
         | 
| 141 | 
             
                        read_code,
         | 
| 142 | 
             
                        run_video,
         | 
| 143 | 
             
                    ],
         | 
| 144 | 
            -
                    model= | 
|  | |
|  | |
| 145 | 
             
                    max_steps=3,
         | 
| 146 | 
             
                    name="Multimedia_Agent",
         | 
| 147 | 
            -
                    description="An agent that can  | 
| 148 | 
             
                    verbosity_level=1,
         | 
| 149 | 
             
                )
         | 
|  | |
| 150 |  | 
| 151 | 
             
                # Initialize the model
         | 
| 152 | 
             
                # vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
         | 
| @@ -168,16 +187,15 @@ def get_agent(): | |
| 168 | 
             
                # Import helium for the agent
         | 
| 169 | 
             
                # Create manager agent
         | 
| 170 | 
             
                manager_agent = CodeAgent(
         | 
| 171 | 
            -
                    tools=[ | 
| 172 | 
            -
                    managed_agents=[ | 
| 173 | 
             
                    model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
         | 
| 174 | 
             
                    max_steps=5,
         | 
| 175 | 
             
                    planning_interval=10,
         | 
| 176 | 
             
                    additional_authorized_imports=["pandas", "numpy"],
         | 
| 177 | 
            -
                    verbosity_level= | 
| 178 | 
             
                )
         | 
| 179 |  | 
| 180 | 
            -
                manager_agent.prompt_templates["system_prompt"] += add_sys_prompt
         | 
| 181 | 
             
                return manager_agent
         | 
| 182 |  | 
| 183 |  | 
|  | |
| 3 | 
             
                CodeAgent,
         | 
| 4 | 
             
                DuckDuckGoSearchTool,
         | 
| 5 | 
             
                VisitWebpageTool,
         | 
| 6 | 
            +
                InferenceClientModel,
         | 
| 7 | 
             
                OpenAIServerModel,
         | 
| 8 | 
             
                WikipediaSearchTool,
         | 
| 9 | 
             
            )
         | 
|  | |
| 79 | 
             
            ```<end_code>
         | 
| 80 | 
             
            """
         | 
| 81 |  | 
| 82 | 
            +
            add_sys_prompt = """\n\nWhen processing tasks with files:
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            1. Use the fetch_task_files tool with the URL provided to you to download and process files
         | 
| 85 | 
            +
            2. Depending on the file type returned, use the appropriate specialized tool:
         | 
| 86 | 
            +
               - For images: Use the data_url returned with read_image tool
         | 
| 87 | 
            +
               - For audio: Use the audio data with transcribe_audio tool
         | 
| 88 | 
            +
               - For code files: Use read_code tool
         | 
| 89 | 
            +
               - For videos: Use run_video tool
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            3. When handling different file types:
         | 
| 92 | 
            +
               - Images: The fetch_task_files tool will return a data_url you can use directly with read_image
         | 
| 93 | 
            +
               - Code: Do not execute code files, analyze them as text
         | 
| 94 | 
            +
               - Tabular data (CSV, Excel): Use pandas to analyze the data
         | 
| 95 | 
            +
               - Videos: Extract relevant information from visual frames and audio
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            4. Keep answers concise and to the point. The answer is likely as simple as one word.
         | 
| 98 | 
            +
            5. Make sure you provide the answer in accordance with the instruction provided in the question.
         | 
| 99 | 
            +
            6. Do not return the raw result of tool calls as your final answer.
         | 
| 100 | 
            +
            7. Do not add any additional information, explanation, unnecessary words or symbols.
         | 
| 101 | 
            +
            """
         | 
| 102 |  | 
| 103 |  | 
| 104 | 
             
            def initialize_tracing(enabled=True, provider="langfuse"):
         | 
|  | |
| 148 | 
             
                    description="A web agent that can search the web and visit webpages.",
         | 
| 149 | 
             
                    verbosity_level=1,
         | 
| 150 | 
             
                )
         | 
| 151 | 
            +
             | 
| 152 | 
             
                mm_agent = CodeAgent(
         | 
| 153 | 
             
                    tools=[
         | 
| 154 | 
            +
                        fetch_task_files,
         | 
| 155 | 
             
                        read_image,
         | 
| 156 | 
             
                        transcribe_audio,
         | 
| 157 | 
             
                        read_code,
         | 
| 158 | 
             
                        run_video,
         | 
| 159 | 
             
                    ],
         | 
| 160 | 
            +
                    model=InferenceClientModel(
         | 
| 161 | 
            +
                        model_id="Qwen/Qwen2.5-VL-32B-Instruct",  # provider="together"
         | 
| 162 | 
            +
                    ),
         | 
| 163 | 
             
                    max_steps=3,
         | 
| 164 | 
             
                    name="Multimedia_Agent",
         | 
| 165 | 
            +
                    description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
         | 
| 166 | 
             
                    verbosity_level=1,
         | 
| 167 | 
             
                )
         | 
| 168 | 
            +
                mm_agent.prompt_templates["system_prompt"] += add_sys_prompt
         | 
| 169 |  | 
| 170 | 
             
                # Initialize the model
         | 
| 171 | 
             
                # vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
         | 
|  | |
| 187 | 
             
                # Import helium for the agent
         | 
| 188 | 
             
                # Create manager agent
         | 
| 189 | 
             
                manager_agent = CodeAgent(
         | 
| 190 | 
            +
                    tools=[],
         | 
| 191 | 
            +
                    managed_agents=[mm_agent, web_agent],
         | 
| 192 | 
             
                    model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
         | 
| 193 | 
             
                    max_steps=5,
         | 
| 194 | 
             
                    planning_interval=10,
         | 
| 195 | 
             
                    additional_authorized_imports=["pandas", "numpy"],
         | 
| 196 | 
            +
                    verbosity_level=2,
         | 
| 197 | 
             
                )
         | 
| 198 |  | 
|  | |
| 199 | 
             
                return manager_agent
         | 
| 200 |  | 
| 201 |  | 
    	
        app.py
    CHANGED
    
    | @@ -29,11 +29,13 @@ class BasicAgent: | |
| 29 |  | 
| 30 | 
             
                    # If task_id is provided, we'll include context about possible files
         | 
| 31 | 
             
                    if task_id:
         | 
|  | |
|  | |
|  | |
| 32 | 
             
                        # Add context about files to the question
         | 
| 33 | 
             
                        context = f"""Task ID: {task_id}
         | 
| 34 |  | 
| 35 | 
            -
            If  | 
| 36 | 
            -
            Example: fetch_task_files(task_id="{task_id}")
         | 
| 37 |  | 
| 38 | 
             
            Question: {question}"""
         | 
| 39 |  | 
|  | |
| 29 |  | 
| 30 | 
             
                    # If task_id is provided, we'll include context about possible files
         | 
| 31 | 
             
                    if task_id:
         | 
| 32 | 
            +
                        # API base URL for constructing file URLs
         | 
| 33 | 
            +
                        api_base_url = "https://agents-course-unit4-scoring.hf.space"
         | 
| 34 | 
            +
             | 
| 35 | 
             
                        # Add context about files to the question
         | 
| 36 | 
             
                        context = f"""Task ID: {task_id}
         | 
| 37 |  | 
| 38 | 
            +
            IMPORTANT: If the question mentions an image, file, or other media, construct the file URL using: {api_base_url}/files/{task_id}
         | 
|  | |
| 39 |  | 
| 40 | 
             
            Question: {question}"""
         | 
| 41 |  | 
    	
        tools.py
    CHANGED
    
    | @@ -549,18 +549,18 @@ def process_binary(response, filename, content_type): | |
| 549 |  | 
| 550 |  | 
| 551 | 
             
            @tool
         | 
| 552 | 
            -
            def fetch_task_files( | 
| 553 | 
             
                """
         | 
| 554 | 
            -
                Download files  | 
| 555 |  | 
| 556 | 
             
                Args:
         | 
| 557 | 
            -
                     | 
| 558 |  | 
| 559 | 
             
                Returns:
         | 
| 560 | 
             
                    dict: A dictionary containing file information and data in appropriate format for the file type
         | 
| 561 | 
             
                """
         | 
| 562 | 
            -
                 | 
| 563 | 
            -
                 | 
| 564 |  | 
| 565 | 
             
                try:
         | 
| 566 | 
             
                    response = requests.get(files_url, timeout=15)
         | 
| @@ -572,7 +572,7 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]: | |
| 572 | 
             
                    if "filename=" in filename:
         | 
| 573 | 
             
                        filename = filename.split("filename=")[-1].strip('"')
         | 
| 574 | 
             
                    else:
         | 
| 575 | 
            -
                        filename =  | 
| 576 |  | 
| 577 | 
             
                    print(f"Received file: {filename}, type: {content_type}")
         | 
| 578 |  | 
| @@ -620,10 +620,10 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]: | |
| 620 | 
             
                        return process_binary(response, filename, content_type)
         | 
| 621 |  | 
| 622 | 
             
                except requests.exceptions.RequestException as e:
         | 
| 623 | 
            -
                    print(f"Error fetching  | 
| 624 | 
             
                    return {"error": f"Error fetching files: {e}"}
         | 
| 625 | 
             
                except Exception as e:
         | 
| 626 | 
            -
                    print(f"An unexpected error occurred fetching files  | 
| 627 | 
             
                    return {"error": f"An unexpected error occurred: {e}"}
         | 
| 628 |  | 
| 629 |  | 
| @@ -652,21 +652,23 @@ def search_wikipedia(query: str) -> str: | |
| 652 |  | 
| 653 |  | 
| 654 | 
             
            if __name__ == "__main__":
         | 
| 655 | 
            -
                # Simple test for fetch_task_files
         | 
| 656 | 
            -
                 | 
| 657 | 
            -
             | 
| 658 | 
            -
                    " | 
| 659 | 
            -
                    " | 
|  | |
| 660 | 
             
                ]
         | 
| 661 | 
            -
             | 
|  | |
| 662 | 
             
                    print(
         | 
| 663 | 
             
                        "=" * 20
         | 
| 664 | 
             
                        + " "
         | 
| 665 | 
            -
                        + f"Testing fetch_task_files with  | 
| 666 | 
             
                        + " "
         | 
| 667 | 
             
                        + "=" * 20
         | 
| 668 | 
             
                    )
         | 
| 669 |  | 
| 670 | 
            -
                    result = fetch_task_files( | 
| 671 | 
             
                    print(f"File type: {result.get('file_type')}")
         | 
| 672 | 
             
                    print(f"Filename: {result.get('filename')}")
         | 
|  | |
| 549 |  | 
| 550 |  | 
| 551 | 
             
            @tool
         | 
| 552 | 
            +
            def fetch_task_files(url: str) -> Dict[str, Any]:
         | 
| 553 | 
             
                """
         | 
| 554 | 
            +
                Download files from a given URL.
         | 
| 555 |  | 
| 556 | 
             
                Args:
         | 
| 557 | 
            +
                    url (str): Direct URL to the file to download.
         | 
| 558 |  | 
| 559 | 
             
                Returns:
         | 
| 560 | 
             
                    dict: A dictionary containing file information and data in appropriate format for the file type
         | 
| 561 | 
             
                """
         | 
| 562 | 
            +
                files_url = url
         | 
| 563 | 
            +
                print(f"Fetching file from: {files_url}")
         | 
| 564 |  | 
| 565 | 
             
                try:
         | 
| 566 | 
             
                    response = requests.get(files_url, timeout=15)
         | 
|  | |
| 572 | 
             
                    if "filename=" in filename:
         | 
| 573 | 
             
                        filename = filename.split("filename=")[-1].strip('"')
         | 
| 574 | 
             
                    else:
         | 
| 575 | 
            +
                        filename = "file.bin"  # Default filename
         | 
| 576 |  | 
| 577 | 
             
                    print(f"Received file: {filename}, type: {content_type}")
         | 
| 578 |  | 
|  | |
| 620 | 
             
                        return process_binary(response, filename, content_type)
         | 
| 621 |  | 
| 622 | 
             
                except requests.exceptions.RequestException as e:
         | 
| 623 | 
            +
                    print(f"Error fetching url: {files_url} - {e}")
         | 
| 624 | 
             
                    return {"error": f"Error fetching files: {e}"}
         | 
| 625 | 
             
                except Exception as e:
         | 
| 626 | 
            +
                    print(f"An unexpected error occurred fetching files from url: {files_url}- {e}")
         | 
| 627 | 
             
                    return {"error": f"An unexpected error occurred: {e}"}
         | 
| 628 |  | 
| 629 |  | 
|  | |
| 652 |  | 
| 653 |  | 
| 654 | 
             
            if __name__ == "__main__":
         | 
| 655 | 
            +
                # Simple test for fetch_task_files with direct URLs
         | 
| 656 | 
            +
                api_base = "https://agents-course-unit4-scoring.hf.space"
         | 
| 657 | 
            +
                test_urls = [
         | 
| 658 | 
            +
                    f"{api_base}/files/cca530fc-4052-43b2-b130-b30968d8aa44",
         | 
| 659 | 
            +
                    f"{api_base}/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
         | 
| 660 | 
            +
                    f"{api_base}/files/7bd855d8-463d-4ed5-93ca-5fe35145f733",
         | 
| 661 | 
             
                ]
         | 
| 662 | 
            +
             | 
| 663 | 
            +
                for url in test_urls:
         | 
| 664 | 
             
                    print(
         | 
| 665 | 
             
                        "=" * 20
         | 
| 666 | 
             
                        + " "
         | 
| 667 | 
            +
                        + f"Testing fetch_task_files with URL: {url}"
         | 
| 668 | 
             
                        + " "
         | 
| 669 | 
             
                        + "=" * 20
         | 
| 670 | 
             
                    )
         | 
| 671 |  | 
| 672 | 
            +
                    result = fetch_task_files(url)
         | 
| 673 | 
             
                    print(f"File type: {result.get('file_type')}")
         | 
| 674 | 
             
                    print(f"Filename: {result.get('filename')}")
         | 
 
			
