Final_Assignment_Template

Runtime error

App Files Files Community

xmuruaga commited on May 21

Commit

4f58ba4

verified ·

1 Parent(s): 81917a3

Upload 4 files

Browse files

Files changed (4) hide show

app.py +162 -19
tooling.py +131 -0
wikipedia_utils.py +52 -0
youtube_utils.py +24 -0

app.py CHANGED Viewed

@@ -3,6 +3,16 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -10,25 +20,145 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -55,16 +185,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -74,18 +204,31 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
@@ -172,10 +315,10 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +326,14 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+from smolagents import DuckDuckGoSearchTool,GoogleSearchTool, HfApiModel, PythonInterpreterTool, VisitWebpageTool, CodeAgent,Tool, LiteLLMModel
+import hashlib
+import json
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TransformersEngine
+import wikipedia
+from tooling import WikipediaPageFetcher,MathModelQuerer, YoutubeTranscriptFetcher, CodeModelQuerer
+from langchain_community.agent_toolkits.load_tools import load_tools
+import time
+import torch
 # (Keep Constants as is)
 # --- Constants ---
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+cache = {}
+class WebSearchTool(DuckDuckGoSearchTool):
+    name = "web_search_ddg"
+    description = "Search the web using DuckDuckGo"
+web_search_ddf = WebSearchTool()
+google_search = GoogleSearchTool(provider="serper")
+python_interpreter = PythonInterpreterTool(authorized_imports = [
+    # standard library
+    'os',                      # For file path manipulation, checking existence, deletion
+    'glob',                    # Find files matching specific patterns
+    'pathlib',                 # Alternative for path manipulation
+    'sys',
+    'math',
+    'random',
+    'datetime',
+    'time',
+    'json',
+    'csv',
+    're',
+    'collections',
+    'itertools',
+    'functools',
+    'io',
+    'base64',
+    'hashlib',
+    'pathlib',
+    'glob',
+    # Third-Party Libraries (ensure they are installed in the execution env)
+    'pandas',         # Data manipulation and analysis
+    'numpy',          # Numerical operations
+    'scipy',          # Scientific and technical computing (stats, optimize, etc.)
+    'sklearn',        # Machine learning
+])
+visit_webpage_tool = VisitWebpageTool()
+wiki_tool = WikipediaPageFetcher()
+yt_transcript_fetcher = YoutubeTranscriptFetcher()
+# math_model_querer = MathModelQuerer()
+# code_model_querer = CodeModelQuerer()
+# batch of tools fromm Langchain. Credits DataDiva88
+lc_ddg_search = Tool.from_langchain(load_tools(["ddg-search"])[0])
+lc_wikipedia = Tool.from_langchain(load_tools(["wikipedia"])[0])
+lc_arxiv = Tool.from_langchain(load_tools(["arxiv"])[0])
+lc_pubmed = Tool.from_langchain(load_tools(["pubmed"])[0])
+lc_stackechange = Tool.from_langchain(load_tools(["stackexchange"])[0])
+def load_cached_answer(question_id: str) -> str:
+    if question_id in cache.keys():
+        return cache[question_id]
+    else:
+        return None
+def cache_answer(question_id: str, answer: str):
+    cache[question_id] = answer
+# --- Model Setup ---
+#MODEL_NAME = 'Qwen/Qwen2.5-3B-Instruct'  # 'meta-llama/Llama-3.2-3B-Instruct'
+# "Qwen/Qwen2.5-VL-3B-Instruct"#'meta-llama/Llama-2-7b-hf'#'meta-llama/Llama-3.1-8B-Instruct'#'TinyLlama/TinyLlama-1.1B-Chat-v1.0'#'mistralai/Mistral-7B-Instruct-v0.2'#'microsoft/DialoGPT-small'# 'EleutherAI/gpt-neo-2.7B'#'distilbert/distilgpt2'#'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'#'mistralai/Mistral-7B-Instruct-v0.2'
+def load_model(model_name):
+    """Download and load the model and tokenizer."""
+    try:
+        print(f"Loading model {MODEL_NAME}...")
+        model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        print(f"Model {MODEL_NAME} loaded successfully.")
+        transformers_engine = TransformersEngine(pipeline("text-generation", model=model, tokenizer=tokenizer))
+        return transformers_engine, model
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        raise
+# Load the model and tokenizer locally
+# model, tokenizer = load_model()
+#model_id = "meta-llama/Llama-3.1-8B-Instruct"  # "microsoft/phi-2"# not working out of the box"google/gemma-2-2b-it" #toobig"Qwen/Qwen1.5-7B-Chat"#working but stupid: "meta-llama/Llama-3.2-3B-Instruct"
+model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=512)
+#from smolagents import TransformersModel
+# model = TransformersModel(
+#     model_id=model_id,
+#     max_new_tokens=256)
+# model = HfApiModel()
+lc_ddg_search = Tool.from_langchain(load_tools(["ddg-search"])[0])
+lc_wikipedia = Tool.from_langchain(load_tools(["wikipedia"])[0])
+lc_arxiv = Tool.from_langchain(load_tools(["arxiv"])[0])
+lc_pubmed = Tool.from_langchain(load_tools(["pubmed"])[0])
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        self.agent = CodeAgent(
+            model=model,
+            tools=[google_search,web_search_ddf, python_interpreter, visit_webpage_tool, wiki_tool,lc_wikipedia,lc_arxiv,lc_pubmed,lc_stackechange],
+            max_steps=10,
+            verbosity_level=1,
+            grammar=None,
+            planning_interval=3,
+            add_base_tools=True,
+            additional_authorized_imports=['requests', 'wikipedia', 'pandas','datetime']
+        )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        answer = self.agent.run(question)
+        return answer
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
+        time.sleep(60)
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            cached = load_cached_answer(task_id)
+            if cached:
+                submitted_answer = cached
+                print(f"Loaded cached answer for task {task_id}")
+            else:
+                submitted_answer = agent(question_text)
+                cache_answer(task_id, submitted_answer)
+                print(f"Generated and cached answer for task {task_id}")
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
     )
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

tooling.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from smolagents import Tool
+from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
+import torch
+from wikipedia_utils import *
+from youtube_utils import *
+class MathModelQuerer(Tool):
+    name = "math_model"
+    description = "Solves advanced math problems using a pretrained\
+    large language model specialized in mathematics. Ideal for symbolic reasoning, \
+    calculus, algebra, and other technical math queries."
+    inputs = {
+        "problem": {
+            "type": "string",
+            "description": "Math problem to solve.",
+        }
+    }
+    output_type = "string"
+    def __init__(self, model_name="deepseek-ai/deepseek-math-7b-base"):
+        print(f"Loading math model: {model_name}")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        print("loaded tokenizer")
+        self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
+        print("loaded auto model")
+        self.model.generation_config = GenerationConfig.from_pretrained(model_name)
+        print("loaded coonfig")
+        self.model.generation_config.pad_token_id = self.model.generation_config.eos_token_id
+        print("loaded pad token")
+    def forward(self, problem: str) -> str:
+        try:
+            print(f"[MathModelTool] Question: {problem}")
+            inputs = self.tokenizer(problem, return_tensors="pt")
+            outputs = self.model.generate(**inputs, max_new_tokens=100)
+            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return result
+        except:
+            return f"Failed using the tool {self.name}"
+class CodeModelQuerer(Tool):
+    name = "code_querer"
+    description = "Generates code snippets based on a natural language description of a\
+    programming task using a powerful coding-focused language model. Suitable\
+    for solving coding problems, generating functions, or implementing algorithms."
+    inputs = {
+        "problem": {
+            "type": "string",
+            "description": "Description of a code sample to be generated",
+        }
+    }
+    output_type = "string"
+    def __init__(self, model_name="Qwen/Qwen2.5-Coder-32B-Instruct"):
+        from smolagents import HfApiModel
+        print(f"Loading llm for Code tool: {model_name}")
+        self.model = HfApiModel()
+    def forward(self, problem: str) -> str:
+        try:
+            return self.model.generate(problem, max_new_tokens=512)
+        except:
+            return f"Failed using the tool {self.name}"
+class WikipediaPageFetcher(Tool):
+    name = "wiki_page_fetcher"
+    description =' Searches and fetches summaries from Wikipedia for any topic,\
+    across all supported languages and versions. Only a single query string is required as input.'
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "Topic of wikipedia search",
+        }
+    }
+    output_type = "string"
+    def forward(self, query: str) -> str:
+        try:
+            wiki_query = query(query)
+            wiki_page = fetch_wikipedia_page(wiki_query)
+            return wiki_page
+        except:
+            return f"Failed using the tool {self.name}"
+class YoutubeTranscriptFetcher(Tool):
+    name = "youtube_transcript_fetcher"
+    description ="Fetches the English transcript of a YouTube video using either a direct video \
+    ID or a URL that includes one. Accepts a query containing the link or the raw video ID directly. Returns the transcript as plain text."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "A query that includes youtube id."
+        },
+        "video_id" : {
+            "type" : "string",
+            "description" : "Optional string with video id from youtube.",
+            "nullable"  : True
+        }
+    }
+    output_type = "string"
+    def forward(self, query: str, video_id=None) -> str:
+        try:
+            if video_id is None:
+                video_id = get_youtube_video_id(query)
+            fetched_transcript = fetch_transcript_english(video_id)
+            return post_process_transcript(fetched_transcript)
+        except:
+            return f"Failed using the tool {self.name}"

wikipedia_utils.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import wikipedia
+import spacy
+def get_wiki_query(query):
+    try:
+        ### spacy code
+        # Load the English model
+        nlp = spacy.load("en_core_web_sm")
+        # Parse the sentence
+        doc = nlp(query)
+        # Entity path (people, evenrs, books)
+        entities_components = [entity_substring.text for entity_substring in doc.ents]
+        if len(entities_components) > 0:
+            subject_of_the_query = ""
+            for substrings in entities_components:
+                subject_of_the_query = subject_of_the_query + substrings
+            if subject_of_the_query == "":
+                print("Entity query not parsed.")
+            return subject_of_the_query
+        else:
+            first_noun = next((t for t in doc if t.pos_ in {"NOUN", "PROPN"}), None).text
+            print("Returning first noun from the query.")
+            return first_noun
+    except Exception as e:
+        print("Failed parsing a query subject from query", query)
+        print(e)
+def fetch_wikipedia_page(wiki_query):
+    try:
+        matched_articles = wikipedia.search(wiki_query)
+        if len(matched_articles) > 0:
+            used_article = matched_articles[0]
+            page_content = wikipedia.page(used_article, auto_suggest=False)
+            return page_content.content
+        else:
+            return ""
+    except Exception as e:
+        print("Could not fetch the wikipedia article using ", wiki_query)
+        print(e)

youtube_utils.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from youtube_transcript_api import YouTubeTranscriptApi
+import re
+def get_youtube_video_id(query):
+    try:
+        match = re.search(r'(?:youtu\.be/|youtube\.com/(?:watch\?v=|embed/|v/|shorts/))([\w-]{11})', query)
+        if match:
+            video_id = match.group(1)
+            print(video_id)
+            return video_id
+    except:
+        print("Did not find youtube video id from query ", query)
+def fetch_transcript_english(video_id):
+    try:
+        ytt_api = YouTubeTranscriptApi()
+        transcript = ytt_api.fetch(video_id,languages=['en'])
+        return transcript
+    except:
+        print("Error ")
+def post_process_transcript(transcript_snippets):
+    full_transcript = " ".join([transcript_snippet.text for transcript_snippet in transcript_snippets])
+    return full_transcript