Spaces:

jlgaralc
/

Agent_Agents_Course

Sleeping

App Files Files Community

jesusgj commited on Jun 28

Commit

07e3a65

1 Parent(s): 9fb8366

Modified files

Browse files

Files changed (2) hide show

agent.py +106 -45
requirements.txt +2 -1

agent.py CHANGED Viewed

@@ -2,15 +2,20 @@ import os
 import re
 import requests
 import serpapi
 from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
 from smolagents import InferenceClientModel
 from dotenv import load_dotenv
 from markdownify import markdownify
 from requests.exceptions import RequestException
 from llama_index.core import VectorStoreIndex, download_loader
 search_cache = {}
 webpage_cache = {}
 def initialize_agent():
     # Load environment variables from .env file
@@ -18,7 +23,7 @@ def initialize_agent():
     # 1. Load the model
     # Make sure to set HF_TOKEN in your Hugging Face Space secrets
-    model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     try:
         model = InferenceClientModel(model_id=model_name, token=os.environ.get("HF_TOKEN"), provider="together")
     except Exception as e:
@@ -39,18 +44,59 @@ def initialize_agent():
         """
         if (url, query) in webpage_cache:
             return webpage_cache[(url, query)]
-        try:
-            BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
-            loader = BeautifulSoupWebReader()
-            documents = loader.load_data(urls=[url])
-            index = VectorStoreIndex.from_documents(documents)
-            query_engine = index.as_query_engine()
-            response = query_engine.query(query)
-            webpage_cache[(url, query)] = str(response)
-            return str(response)
-        except Exception as e:
-            return f"An unexpected error occurred: {str(e)}"
     @tool
     def google_search(query: str) -> str:
@@ -64,40 +110,47 @@ def initialize_agent():
         """
         if query in search_cache:
             return search_cache[query]
-        try:
-            client = serpapi.Client(api_key=os.environ.get("SERPAPI_API_KEY"))
-            results = client.search(q=query, engine="google")
-            if "ai_overview" in results:
-                ai_overview = results["ai_overview"]
-                output = ""
-                for block in ai_overview.get("text_blocks", []):
-                    if block["type"] == "paragraph":
-                        output += block["snippet"] + "\n\n"
-                    elif block["type"] == "heading":
-                        output += f"### {block['snippet']}\n\n"
-                    elif block["type"] == "list":
-                        for item in block["list"]:
-                            output += f"- **{item['title']}** {item['snippet']}\n"
-                        output += "\n"
-                if "references" in ai_overview:
-                    output += "\n**References:**\n"
-                    for ref in ai_overview["references"]:
-                        output += f"- [{ref['title']}]({ref['link']})\n"
-                search_cache[query] = output
-                return output
-            elif "organic_results" in results:
-                result = str(results["organic_results"])
-                search_cache[query] = result
-                return result
-            else:
-                return "No results found."
-        except Exception as e:
-            return f"Error performing Google search: {str(e)}"
     # 3. Define the agents
     if model:
         web_agent = ToolCallingAgent(
-            tools=[WebSearchTool(), query_webpage, google_search],
             model=model,
             max_steps=10,
             name="web_search_agent",
@@ -108,8 +161,16 @@ def initialize_agent():
             tools=[],
             model=model,
             managed_agents=[web_agent],
-            additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse"],
-            instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
         )
         return manager_agent
     else:

 import re
 import requests
 import serpapi
+import time
 from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
 from smolagents import InferenceClientModel
 from dotenv import load_dotenv
 from markdownify import markdownify
 from requests.exceptions import RequestException
 from llama_index.core import VectorStoreIndex, download_loader
+from llama_index.core.schema import Document
+from youtube_transcript_api import YouTubeTranscriptApi
 search_cache = {}
 webpage_cache = {}
+MAX_RETRIES = 3
+INITIAL_DELAY = 1  # seconds
 def initialize_agent():
     # Load environment variables from .env file
     # 1. Load the model
     # Make sure to set HF_TOKEN in your Hugging Face Space secrets
+    model_name = "mistralai/Mixtral-8x22B-Instruct-v0.1"
     try:
         model = InferenceClientModel(model_id=model_name, token=os.environ.get("HF_TOKEN"), provider="together")
     except Exception as e:
         """
         if (url, query) in webpage_cache:
             return webpage_cache[(url, query)]
+        for i in range(MAX_RETRIES):
+            try:
+                BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
+                loader = BeautifulSoupWebReader()
+                documents = loader.load_data(urls=[url])
+                index = VectorStoreIndex.from_documents(documents)
+                query_engine = index.as_query_engine()
+                response = query_engine.query(query)
+                webpage_cache[(url, query)] = str(response)
+                return str(response)
+            except Exception as e:
+                if i < MAX_RETRIES - 1:
+                    delay = INITIAL_DELAY * (2 ** i)
+                    print(f"Error querying webpage: {str(e)}. Retrying in {delay} seconds...")
+                    time.sleep(delay)
+                else:
+                    return f"An unexpected error occurred after multiple retries: {str(e)}"
+    @tool
+    def query_youtube_video(video_id: str, query: str) -> str:
+        """Queries a YouTube video's transcript to find specific information and returns a concise answer.
+        Args:
+            video_id: The ID of the YouTube video.
+            query: The specific question to ask about the content of the video transcript.
+        Returns:
+            A concise answer to the query based on the video transcript, or an error message.
+        """
+        if (video_id, query) in webpage_cache: # Using webpage_cache for simplicity
+            return webpage_cache[(video_id, query)]
+        for i in range(MAX_RETRIES):
+            try:
+                transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+                transcript_text = " ".join([t['text'] for t in transcript_list])
+                documents = [Document(text=transcript_text)]
+                index = VectorStoreIndex.from_documents(documents)
+                query_engine = index.as_query_engine()
+                response = query_engine.query(query)
+                webpage_cache[(video_id, query)] = str(response)
+                return str(response)
+            except Exception as e:
+                if i < MAX_RETRIES - 1:
+                    delay = INITIAL_DELAY * (2 ** i)
+                    print(f"Error querying YouTube video: {str(e)}. Retrying in {delay} seconds...")
+                    time.sleep(delay)
+                else:
+                    return f"An unexpected error occurred after multiple retries: {str(e)}"
     @tool
     def google_search(query: str) -> str:
         """
         if query in search_cache:
             return search_cache[query]
+        for i in range(MAX_RETRIES):
+            try:
+                client = serpapi.Client(api_key=os.environ.get("SERPAPI_API_KEY"))
+                results = client.search(q=query, engine="google")
+                if "ai_overview" in results:
+                    ai_overview = results["ai_overview"]
+                    output = ""
+                    for block in ai_overview.get("text_blocks", []):
+                        if block["type"] == "paragraph":
+                            output += block["snippet"] + "\n\n"
+                        elif block["type"] == "heading":
+                            output += f"### {block['snippet']}\n\n"
+                        elif block["type"] == "list":
+                            for item in block["list"]:
+                                output += f"- **{item['title']}** {item['snippet']}\n"
+                            output += "\n"
+                    if "references" in ai_overview:
+                        output += "\n**References:**\n"
+                        for ref in ai_overview["references"]:
+                            output += f"- [{ref['title']}]({ref['link']})\n"
+                    search_cache[query] = output
+                    return output
+                elif "organic_results" in results:
+                    result = str(results["organic_results"])
+                    search_cache[query] = result
+                    return result
+                else:
+                    return "No results found."
+            except Exception as e:
+                if i < MAX_RETRIES - 1:
+                    delay = INITIAL_DELAY * (2 ** i)
+                    print(f"Error performing Google search: {str(e)}. Retrying in {delay} seconds...")
+                    time.sleep(delay)
+                else:
+                    return f"Error performing Google search after multiple retries: {str(e)}"
     # 3. Define the agents
     if model:
         web_agent = ToolCallingAgent(
+            tools=[WebSearchTool(), query_webpage, query_youtube_video, google_search],
             model=model,
             max_steps=10,
             name="web_search_agent",
             tools=[],
             model=model,
             managed_agents=[web_agent],
+            additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse", "youtube_transcript_api"],
+            instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+To achieve the best results, follow these steps:
+1. **Understand the Question:** Carefully read and analyze the user's question to identify the core task and any specific constraints (e.g., format, type of answer).
+2. **Formulate a Plan:** Based on the question, devise a step-by-step plan. This might involve using web search, querying webpages, or analyzing YouTube videos. Consider what information is needed and which tool is best suited to obtain it.
+3. **Execute Tools:** Use the available tools (WebSearchTool, query_webpage, query_youtube_video, google_search) to gather the necessary information. Be mindful of rate limits and use caching effectively.
+4. **Synthesize Information:** Combine and process the information obtained from the tools to formulate a comprehensive answer. If the question requires specific data extraction, ensure accuracy.
+5. **Format the Final Answer:** Adhere strictly to the specified FINAL ANSWER template. Ensure the answer type (number, string, comma-separated list) matches the question's requirement.
+6. **Self-Correction:** If initial attempts fail or produce unsatisfactory results, re-evaluate the plan and try alternative approaches or tools.'''
         )
         return manager_agent
     else:

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ markdownify
 duckduckgo-search
 wikipedia
 serpapi
-llama-index

 duckduckgo-search
 wikipedia
 serpapi
+llama-index
+youtube-transcript-api