Spaces:

jlgaralc
/

Agent_Agents_Course

Sleeping

App Files Files Community

jesusgj commited on Jun 28

Commit

9fb8366

1 Parent(s): a57640e

Modified files

Browse files

Files changed (2) hide show

agent.py +19 -25
requirements.txt +2 -1

agent.py CHANGED Viewed

@@ -7,6 +7,7 @@ from smolagents import InferenceClientModel
 from dotenv import load_dotenv
 from markdownify import markdownify
 from requests.exceptions import RequestException
 search_cache = {}
 webpage_cache = {}
@@ -26,35 +27,28 @@ def initialize_agent():
     # 2. Define the tools
     @tool
-    def visit_webpage(url: str) -> str:
-        """Visits a webpage at the given URL and returns its content as a markdown string.
         Args:
-            url: The URL of the webpage to visit.
         Returns:
-            The content of the webpage converted to Markdown, or an error message if the request fails.
         """
-        if url in webpage_cache:
-            return webpage_cache[url]
         try:
-            # Send a GET request to the URL
-            response = requests.get(url)
-            response.raise_for_status()  # Raise an exception for bad status codes
-            # Convert the HTML content to Markdown
-            markdown_content = markdownify(response.text).strip()
-            # Remove multiple line breaks
-            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
-            # Truncate the content to a maximum of 15000 characters
-            result = markdown_content[:15000]
-            webpage_cache[url] = result
-            return result
-        except RequestException as e:
-            return f"Error fetching the webpage: {str(e)}"
         except Exception as e:
             return f"An unexpected error occurred: {str(e)}"
@@ -103,7 +97,7 @@ def initialize_agent():
     # 3. Define the agents
     if model:
         web_agent = ToolCallingAgent(
-            tools=[WebSearchTool(), visit_webpage, google_search],
             model=model,
             max_steps=10,
             name="web_search_agent",
@@ -114,9 +108,9 @@ def initialize_agent():
             tools=[],
             model=model,
             managed_agents=[web_agent],
-            additional_authorized_imports=["time", "numpy", "pandas"],
             instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
         )
         return manager_agent
     else:
-        return None

 from dotenv import load_dotenv
 from markdownify import markdownify
 from requests.exceptions import RequestException
+from llama_index.core import VectorStoreIndex, download_loader
 search_cache = {}
 webpage_cache = {}
     # 2. Define the tools
     @tool
+    def query_webpage(url: str, query: str) -> str:
+        """Queries a webpage at the given URL to find specific information and returns a concise answer.
         Args:
+            url: The URL of the webpage to query.
+            query: The specific question to ask about the content of the webpage.
         Returns:
+            A concise answer to the query based on the webpage's content, or an error message.
         """
+        if (url, query) in webpage_cache:
+            return webpage_cache[(url, query)]
         try:
+            BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
+            loader = BeautifulSoupWebReader()
+            documents = loader.load_data(urls=[url])
+            index = VectorStoreIndex.from_documents(documents)
+            query_engine = index.as_query_engine()
+            response = query_engine.query(query)
+            webpage_cache[(url, query)] = str(response)
+            return str(response)
         except Exception as e:
             return f"An unexpected error occurred: {str(e)}"
     # 3. Define the agents
     if model:
         web_agent = ToolCallingAgent(
+            tools=[WebSearchTool(), query_webpage, google_search],
             model=model,
             max_steps=10,
             name="web_search_agent",
             tools=[],
             model=model,
             managed_agents=[web_agent],
+            additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse"],
             instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
         )
         return manager_agent
     else:
+        return None

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ gradio
 markdownify
 duckduckgo-search
 wikipedia
-serpapi

 markdownify
 duckduckgo-search
 wikipedia
+serpapi
+llama-index