File size: 5,163 Bytes
8cf27dc
 
 
f92d478
8cf27dc
914cd84
8cf27dc
 
 
9fb8366
8cf27dc
c2d4de7
 
 
8cf27dc
 
 
 
 
 
a57640e
8cf27dc
73cf197
8cf27dc
 
 
 
 
 
9fb8366
 
8cf27dc
 
9fb8366
 
8cf27dc
 
9fb8366
8cf27dc
9fb8366
 
8cf27dc
9fb8366
 
 
 
 
 
 
 
8cf27dc
 
 
 
5291928
 
 
 
 
 
 
 
 
 
c2d4de7
 
5291928
f92d478
 
e812f22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2d4de7
e812f22
 
c2d4de7
 
 
e812f22
 
5291928
 
 
8cf27dc
 
 
9fb8366
8cf27dc
 
 
 
 
 
 
 
 
 
9fb8366
a57640e
8cf27dc
 
 
9fb8366
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import re
import requests
import serpapi
from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
from smolagents import InferenceClientModel
from dotenv import load_dotenv
from markdownify import markdownify
from requests.exceptions import RequestException
from llama_index.core import VectorStoreIndex, download_loader

search_cache = {}
webpage_cache = {}

def initialize_agent():
    # Load environment variables from .env file
    load_dotenv()

    # 1. Load the model
    # Make sure to set HF_TOKEN in your Hugging Face Space secrets
    model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    try:
        model = InferenceClientModel(model_id=model_name, token=os.environ.get("HF_TOKEN"), provider="together")
    except Exception as e:
        print(f"Error loading model: {e}")
        model = None

    # 2. Define the tools
    @tool
    def query_webpage(url: str, query: str) -> str:
        """Queries a webpage at the given URL to find specific information and returns a concise answer.

        Args:
            url: The URL of the webpage to query.
            query: The specific question to ask about the content of the webpage.

        Returns:
            A concise answer to the query based on the webpage's content, or an error message.
        """
        if (url, query) in webpage_cache:
            return webpage_cache[(url, query)]
        try:
            BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
            loader = BeautifulSoupWebReader()
            documents = loader.load_data(urls=[url])
            index = VectorStoreIndex.from_documents(documents)
            query_engine = index.as_query_engine()
            response = query_engine.query(query)
            webpage_cache[(url, query)] = str(response)
            return str(response)

        except Exception as e:
            return f"An unexpected error occurred: {str(e)}"

    @tool
    def google_search(query: str) -> str:
        """Searches Google for the given query and returns the results.

        Args:
            query: The query to search for.

        Returns:
            The search results, or an error message if the search fails.
        """
        if query in search_cache:
            return search_cache[query]
        try:
            client = serpapi.Client(api_key=os.environ.get("SERPAPI_API_KEY"))
            results = client.search(q=query, engine="google")
            if "ai_overview" in results:
                ai_overview = results["ai_overview"]
                output = ""
                for block in ai_overview.get("text_blocks", []):
                    if block["type"] == "paragraph":
                        output += block["snippet"] + "\n\n"
                    elif block["type"] == "heading":
                        output += f"### {block['snippet']}\n\n"
                    elif block["type"] == "list":
                        for item in block["list"]:
                            output += f"- **{item['title']}** {item['snippet']}\n"
                        output += "\n"
                if "references" in ai_overview:
                    output += "\n**References:**\n"
                    for ref in ai_overview["references"]:
                        output += f"- [{ref['title']}]({ref['link']})\n"
                search_cache[query] = output
                return output
            elif "organic_results" in results:
                result = str(results["organic_results"])
                search_cache[query] = result
                return result
            else:
                return "No results found."
        except Exception as e:
            return f"Error performing Google search: {str(e)}"

    # 3. Define the agents
    if model:
        web_agent = ToolCallingAgent(
            tools=[WebSearchTool(), query_webpage, google_search],
            model=model,
            max_steps=10,
            name="web_search_agent",
            description="Runs web searches for you.",
        )

        manager_agent = CodeAgent(
            tools=[],
            model=model,
            managed_agents=[web_agent],
            additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse"],
            instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.'''
        )
        return manager_agent
    else:
        return None