File size: 2,718 Bytes
903af12
8cf27dc
 
903af12
8cf27dc
 
903af12
8cf27dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
import requests
from agent import initialize_agent

# 1. Initialize the agent
manager_agent = initialize_agent()

# 2. Define the API URL
API_URL = "https://gaia-benchmark-dev.hf.space"

# 3. Gradio interface
def run_evaluation(hf_username, space_url):
    if not manager_agent:
        return "Agent could not be initialized. Please check the model and token."
    if not hf_username or not space_url:
        return "Please provide your Hugging Face username and Space URL."

    try:
        # Get questions
        print("Fetching questions...")
        response = requests.get(f"{API_URL}/questions")
        response.raise_for_status()
        questions = response.json()
        print(f"Found {len(questions)} questions.")

        # Run agent on questions
        answers = []
        for i, question in enumerate(questions):
            prompt = question["question"]
            print(f"Answering question {i+1}/{len(questions)}: {prompt}")
            try:
                answer = manager_agent.run(prompt)
                answers.append({"question_id": question["id"], "answer": answer})
                print(f"Got answer: {answer}")
            except Exception as e:
                print(f"Error running agent on question {question['id']}: {e}")
                answers.append({"question_id": question["id"], "answer": "Error: Could not generate an answer."})

        # Submit answers
        print("Submitting answers...")
        submission_response = requests.post(
            f"{API_URL}/submit",
            json={
                "hf_username": hf_username,
                "answers": answers,
                "space_url": space_url
            }
        )
        submission_response.raise_for_status()
        print("Submission successful!")
        return submission_response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return f"An error occurred: {e}"
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return f"An unexpected error occurred: {e}"

with gr.Blocks() as demo:
    gr.Markdown("# GAIA Benchmark Evaluation with smolagent")
    gr.Markdown("Enter your Hugging Face username and the URL of your Space to submit your agent's answers to the GAIA benchmark.")
    with gr.Row():
        hf_username_input = gr.Textbox(label="Hugging Face Username")
        space_url_input = gr.Textbox(label="Hugging Face Space URL")
    run_button = gr.Button("Run Evaluation")
    output = gr.JSON()

    run_button.click(
        run_evaluation,
        inputs=[hf_username_input, space_url_input],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()