Spaces:
Sleeping
Sleeping
File size: 5,163 Bytes
8cf27dc f92d478 8cf27dc 914cd84 8cf27dc 9fb8366 8cf27dc c2d4de7 8cf27dc a57640e 8cf27dc 73cf197 8cf27dc 9fb8366 8cf27dc 9fb8366 8cf27dc 9fb8366 8cf27dc 9fb8366 8cf27dc 9fb8366 8cf27dc 5291928 c2d4de7 5291928 f92d478 e812f22 c2d4de7 e812f22 c2d4de7 e812f22 5291928 8cf27dc 9fb8366 8cf27dc 9fb8366 a57640e 8cf27dc 9fb8366 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import os
import re
import requests
import serpapi
from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
from smolagents import InferenceClientModel
from dotenv import load_dotenv
from markdownify import markdownify
from requests.exceptions import RequestException
from llama_index.core import VectorStoreIndex, download_loader
search_cache = {}
webpage_cache = {}
def initialize_agent():
# Load environment variables from .env file
load_dotenv()
# 1. Load the model
# Make sure to set HF_TOKEN in your Hugging Face Space secrets
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
try:
model = InferenceClientModel(model_id=model_name, token=os.environ.get("HF_TOKEN"), provider="together")
except Exception as e:
print(f"Error loading model: {e}")
model = None
# 2. Define the tools
@tool
def query_webpage(url: str, query: str) -> str:
"""Queries a webpage at the given URL to find specific information and returns a concise answer.
Args:
url: The URL of the webpage to query.
query: The specific question to ask about the content of the webpage.
Returns:
A concise answer to the query based on the webpage's content, or an error message.
"""
if (url, query) in webpage_cache:
return webpage_cache[(url, query)]
try:
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
loader = BeautifulSoupWebReader()
documents = loader.load_data(urls=[url])
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query(query)
webpage_cache[(url, query)] = str(response)
return str(response)
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
@tool
def google_search(query: str) -> str:
"""Searches Google for the given query and returns the results.
Args:
query: The query to search for.
Returns:
The search results, or an error message if the search fails.
"""
if query in search_cache:
return search_cache[query]
try:
client = serpapi.Client(api_key=os.environ.get("SERPAPI_API_KEY"))
results = client.search(q=query, engine="google")
if "ai_overview" in results:
ai_overview = results["ai_overview"]
output = ""
for block in ai_overview.get("text_blocks", []):
if block["type"] == "paragraph":
output += block["snippet"] + "\n\n"
elif block["type"] == "heading":
output += f"### {block['snippet']}\n\n"
elif block["type"] == "list":
for item in block["list"]:
output += f"- **{item['title']}** {item['snippet']}\n"
output += "\n"
if "references" in ai_overview:
output += "\n**References:**\n"
for ref in ai_overview["references"]:
output += f"- [{ref['title']}]({ref['link']})\n"
search_cache[query] = output
return output
elif "organic_results" in results:
result = str(results["organic_results"])
search_cache[query] = result
return result
else:
return "No results found."
except Exception as e:
return f"Error performing Google search: {str(e)}"
# 3. Define the agents
if model:
web_agent = ToolCallingAgent(
tools=[WebSearchTool(), query_webpage, google_search],
model=model,
max_steps=10,
name="web_search_agent",
description="Runs web searches for you.",
)
manager_agent = CodeAgent(
tools=[],
model=model,
managed_agents=[web_agent],
additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse"],
instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
)
return manager_agent
else:
return None |