Spaces:
Sleeping
Sleeping
File size: 2,718 Bytes
903af12 8cf27dc 903af12 8cf27dc 903af12 8cf27dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import requests
from agent import initialize_agent
# 1. Initialize the agent
manager_agent = initialize_agent()
# 2. Define the API URL
API_URL = "https://gaia-benchmark-dev.hf.space"
# 3. Gradio interface
def run_evaluation(hf_username, space_url):
if not manager_agent:
return "Agent could not be initialized. Please check the model and token."
if not hf_username or not space_url:
return "Please provide your Hugging Face username and Space URL."
try:
# Get questions
print("Fetching questions...")
response = requests.get(f"{API_URL}/questions")
response.raise_for_status()
questions = response.json()
print(f"Found {len(questions)} questions.")
# Run agent on questions
answers = []
for i, question in enumerate(questions):
prompt = question["question"]
print(f"Answering question {i+1}/{len(questions)}: {prompt}")
try:
answer = manager_agent.run(prompt)
answers.append({"question_id": question["id"], "answer": answer})
print(f"Got answer: {answer}")
except Exception as e:
print(f"Error running agent on question {question['id']}: {e}")
answers.append({"question_id": question["id"], "answer": "Error: Could not generate an answer."})
# Submit answers
print("Submitting answers...")
submission_response = requests.post(
f"{API_URL}/submit",
json={
"hf_username": hf_username,
"answers": answers,
"space_url": space_url
}
)
submission_response.raise_for_status()
print("Submission successful!")
return submission_response.json()
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return f"An error occurred: {e}"
except Exception as e:
print(f"An unexpected error occurred: {e}")
return f"An unexpected error occurred: {e}"
with gr.Blocks() as demo:
gr.Markdown("# GAIA Benchmark Evaluation with smolagent")
gr.Markdown("Enter your Hugging Face username and the URL of your Space to submit your agent's answers to the GAIA benchmark.")
with gr.Row():
hf_username_input = gr.Textbox(label="Hugging Face Username")
space_url_input = gr.Textbox(label="Hugging Face Space URL")
run_button = gr.Button("Run Evaluation")
output = gr.JSON()
run_button.click(
run_evaluation,
inputs=[hf_username_input, space_url_input],
outputs=output
)
if __name__ == "__main__":
demo.launch() |