jesusgj commited on
Commit
9fb8366
·
1 Parent(s): a57640e

Modified files

Browse files
Files changed (2) hide show
  1. agent.py +19 -25
  2. requirements.txt +2 -1
agent.py CHANGED
@@ -7,6 +7,7 @@ from smolagents import InferenceClientModel
7
  from dotenv import load_dotenv
8
  from markdownify import markdownify
9
  from requests.exceptions import RequestException
 
10
 
11
  search_cache = {}
12
  webpage_cache = {}
@@ -26,35 +27,28 @@ def initialize_agent():
26
 
27
  # 2. Define the tools
28
  @tool
29
- def visit_webpage(url: str) -> str:
30
- """Visits a webpage at the given URL and returns its content as a markdown string.
31
 
32
  Args:
33
- url: The URL of the webpage to visit.
 
34
 
35
  Returns:
36
- The content of the webpage converted to Markdown, or an error message if the request fails.
37
  """
38
- if url in webpage_cache:
39
- return webpage_cache[url]
40
  try:
41
- # Send a GET request to the URL
42
- response = requests.get(url)
43
- response.raise_for_status() # Raise an exception for bad status codes
 
 
 
 
 
44
 
45
- # Convert the HTML content to Markdown
46
- markdown_content = markdownify(response.text).strip()
47
-
48
- # Remove multiple line breaks
49
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
50
-
51
- # Truncate the content to a maximum of 15000 characters
52
- result = markdown_content[:15000]
53
- webpage_cache[url] = result
54
- return result
55
-
56
- except RequestException as e:
57
- return f"Error fetching the webpage: {str(e)}"
58
  except Exception as e:
59
  return f"An unexpected error occurred: {str(e)}"
60
 
@@ -103,7 +97,7 @@ def initialize_agent():
103
  # 3. Define the agents
104
  if model:
105
  web_agent = ToolCallingAgent(
106
- tools=[WebSearchTool(), visit_webpage, google_search],
107
  model=model,
108
  max_steps=10,
109
  name="web_search_agent",
@@ -114,9 +108,9 @@ def initialize_agent():
114
  tools=[],
115
  model=model,
116
  managed_agents=[web_agent],
117
- additional_authorized_imports=["time", "numpy", "pandas"],
118
  instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
119
  )
120
  return manager_agent
121
  else:
122
- return None
 
7
  from dotenv import load_dotenv
8
  from markdownify import markdownify
9
  from requests.exceptions import RequestException
10
+ from llama_index.core import VectorStoreIndex, download_loader
11
 
12
  search_cache = {}
13
  webpage_cache = {}
 
27
 
28
  # 2. Define the tools
29
  @tool
30
+ def query_webpage(url: str, query: str) -> str:
31
+ """Queries a webpage at the given URL to find specific information and returns a concise answer.
32
 
33
  Args:
34
+ url: The URL of the webpage to query.
35
+ query: The specific question to ask about the content of the webpage.
36
 
37
  Returns:
38
+ A concise answer to the query based on the webpage's content, or an error message.
39
  """
40
+ if (url, query) in webpage_cache:
41
+ return webpage_cache[(url, query)]
42
  try:
43
+ BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
44
+ loader = BeautifulSoupWebReader()
45
+ documents = loader.load_data(urls=[url])
46
+ index = VectorStoreIndex.from_documents(documents)
47
+ query_engine = index.as_query_engine()
48
+ response = query_engine.query(query)
49
+ webpage_cache[(url, query)] = str(response)
50
+ return str(response)
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
  return f"An unexpected error occurred: {str(e)}"
54
 
 
97
  # 3. Define the agents
98
  if model:
99
  web_agent = ToolCallingAgent(
100
+ tools=[WebSearchTool(), query_webpage, google_search],
101
  model=model,
102
  max_steps=10,
103
  name="web_search_agent",
 
108
  tools=[],
109
  model=model,
110
  managed_agents=[web_agent],
111
+ additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse"],
112
  instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
113
  )
114
  return manager_agent
115
  else:
116
+ return None
requirements.txt CHANGED
@@ -6,4 +6,5 @@ gradio
6
  markdownify
7
  duckduckgo-search
8
  wikipedia
9
- serpapi
 
 
6
  markdownify
7
  duckduckgo-search
8
  wikipedia
9
+ serpapi
10
+ llama-index