kamorou commited on
Commit
29e9c5e
·
verified ·
1 Parent(s): 3175eb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -324
app.py CHANGED
@@ -1,380 +1,379 @@
1
- # import os
2
- # import gradio as gr
3
- # import requests
4
- # import inspect
5
- # import pandas as pd
6
-
7
- # # Add this line with the other imports
8
- # from agent import BasicAgent
9
-
10
- # # (Keep Constants as is)
11
- # # --- Constants ---
12
- # DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
-
14
- # # --- Basic Agent Definition ---
15
- # # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
- # class BasicAgent:
17
- # def __init__(self):
18
- # print("BasicAgent initialized.")
19
- # def __call__(self, question: str) -> str:
20
- # print(f"Agent received question (first 50 chars): {question[:50]}...")
21
- # fixed_answer = "This is a default answer."
22
- # print(f"Agent returning fixed answer: {fixed_answer}")
23
- # return fixed_answer
24
-
25
- # def run_and_submit_all( profile: gr.OAuthProfile | None):
26
- # """
27
- # Fetches all questions, runs the BasicAgent on them, submits all answers,
28
- # and displays the results.
29
- # """
30
- # # --- Determine HF Space Runtime URL and Repo URL ---
31
- # space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
32
-
33
- # if profile:
34
- # username= f"{profile.username}"
35
- # print(f"User logged in: {username}")
36
- # else:
37
- # print("User not logged in.")
38
- # return "Please Login to Hugging Face with the button.", None
39
-
40
- # api_url = DEFAULT_API_URL
41
- # questions_url = f"{api_url}/questions"
42
- # submit_url = f"{api_url}/submit"
43
-
44
- # # 1. Instantiate Agent ( modify this part to create your agent)
45
- # try:
46
- # agent = BasicAgent()
47
- # except Exception as e:
48
- # print(f"Error instantiating agent: {e}")
49
- # return f"Error initializing agent: {e}", None
50
- # # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
51
- # agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
52
- # print(agent_code)
53
-
54
- # # 2. Fetch Questions
55
- # print(f"Fetching questions from: {questions_url}")
56
- # try:
57
- # response = requests.get(questions_url, timeout=15)
58
- # response.raise_for_status()
59
- # questions_data = response.json()
60
- # if not questions_data:
61
- # print("Fetched questions list is empty.")
62
- # return "Fetched questions list is empty or invalid format.", None
63
- # print(f"Fetched {len(questions_data)} questions.")
64
- # except requests.exceptions.RequestException as e:
65
- # print(f"Error fetching questions: {e}")
66
- # return f"Error fetching questions: {e}", None
67
- # except requests.exceptions.JSONDecodeError as e:
68
- # print(f"Error decoding JSON response from questions endpoint: {e}")
69
- # print(f"Response text: {response.text[:500]}")
70
- # return f"Error decoding server response for questions: {e}", None
71
- # except Exception as e:
72
- # print(f"An unexpected error occurred fetching questions: {e}")
73
- # return f"An unexpected error occurred fetching questions: {e}", None
74
-
75
- # # 3. Run your Agent
76
- # results_log = []
77
- # answers_payload = []
78
- # print(f"Running agent on {len(questions_data)} questions...")
79
- # for item in questions_data:
80
- # task_id = item.get("task_id")
81
- # question_text = item.get("question")
82
- # if not task_id or question_text is None:
83
- # print(f"Skipping item with missing task_id or question: {item}")
84
- # continue
85
- # try:
86
- # submitted_answer = agent(question_text)
87
- # answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
88
- # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
89
- # except Exception as e:
90
- # print(f"Error running agent on task {task_id}: {e}")
91
- # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
92
-
93
- # if not answers_payload:
94
- # print("Agent did not produce any answers to submit.")
95
- # return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
96
-
97
- # # 4. Prepare Submission
98
- # submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
99
- # status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
100
- # print(status_update)
101
-
102
- # # 5. Submit
103
- # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
104
- # try:
105
- # response = requests.post(submit_url, json=submission_data, timeout=60)
106
- # response.raise_for_status()
107
- # result_data = response.json()
108
- # final_status = (
109
- # f"Submission Successful!\n"
110
- # f"User: {result_data.get('username')}\n"
111
- # f"Overall Score: {result_data.get('score', 'N/A')}% "
112
- # f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
113
- # f"Message: {result_data.get('message', 'No message received.')}"
114
- # )
115
- # print("Submission successful.")
116
- # results_df = pd.DataFrame(results_log)
117
- # return final_status, results_df
118
- # except requests.exceptions.HTTPError as e:
119
- # error_detail = f"Server responded with status {e.response.status_code}."
120
- # try:
121
- # error_json = e.response.json()
122
- # error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
123
- # except requests.exceptions.JSONDecodeError:
124
- # error_detail += f" Response: {e.response.text[:500]}"
125
- # status_message = f"Submission Failed: {error_detail}"
126
- # print(status_message)
127
- # results_df = pd.DataFrame(results_log)
128
- # return status_message, results_df
129
- # except requests.exceptions.Timeout:
130
- # status_message = "Submission Failed: The request timed out."
131
- # print(status_message)
132
- # results_df = pd.DataFrame(results_log)
133
- # return status_message, results_df
134
- # except requests.exceptions.RequestException as e:
135
- # status_message = f"Submission Failed: Network error - {e}"
136
- # print(status_message)
137
- # results_df = pd.DataFrame(results_log)
138
- # return status_message, results_df
139
- # except Exception as e:
140
- # status_message = f"An unexpected error occurred during submission: {e}"
141
- # print(status_message)
142
- # results_df = pd.DataFrame(results_log)
143
- # return status_message, results_df
144
-
145
-
146
- # # --- Build Gradio Interface using Blocks ---
147
- # with gr.Blocks() as demo:
148
- # gr.Markdown("# Basic Agent Evaluation Runner")
149
- # gr.Markdown(
150
- # """
151
- # **Instructions:**
152
-
153
- # 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
154
- # 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
155
- # 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
156
-
157
- # ---
158
- # **Disclaimers:**
159
- # Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
160
- # This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
161
- # """
162
- # )
163
-
164
- # gr.LoginButton()
165
-
166
- # run_button = gr.Button("Run Evaluation & Submit All Answers")
167
-
168
- # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
169
- # # Removed max_rows=10 from DataFrame constructor
170
- # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
171
-
172
- # run_button.click(
173
- # fn=run_and_submit_all,
174
- # outputs=[status_output, results_table]
175
- # )
176
-
177
- # if __name__ == "__main__":
178
- # print("\n" + "-"*30 + " App Starting " + "-"*30)
179
- # # Check for SPACE_HOST and SPACE_ID at startup for information
180
- # space_host_startup = os.getenv("SPACE_HOST")
181
- # space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
182
-
183
- # if space_host_startup:
184
- # print(f"✅ SPACE_HOST found: {space_host_startup}")
185
- # print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
186
- # else:
187
- # print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
188
-
189
- # if space_id_startup: # Print repo URLs if SPACE_ID is found
190
- # print(f"✅ SPACE_ID found: {space_id_startup}")
191
- # print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
192
- # print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
193
- # else:
194
- # print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
195
-
196
- # print("-"*(60 + len(" App Starting ")) + "\n")
197
-
198
- # print("Launching Gradio Interface for Basic Agent Evaluation...")
199
- # demo.launch(debug=True, share=False)
200
-
201
-
202
  import os
203
  import gradio as gr
204
  import requests
205
  import inspect
206
  import pandas as pd
207
- from dotenv import load_dotenv
208
- from typing import TypedDict, Annotated, List
209
-
210
- # ==============================================================================
211
- # PART 1: YOUR AGENT'S LOGIC AND DEFINITION
212
- # ==============================================================================
213
-
214
- # LangChain and LangGraph imports
215
- from langchain_huggingface import HuggingFaceEndpoint
216
- # NEW: Import TavilySearch from the new package
217
- from langchain_tavily import TavilySearch
218
- from langchain_experimental.tools import PythonREPLTool
219
- from langchain_core.messages import BaseMessage, HumanMessage
220
- from langgraph.graph import StateGraph, END
221
- from langgraph.prebuilt import ToolNode
222
- # NEW: Import the compatible agent constructor and prompt hub
223
- from langchain.agents import create_tool_calling_agent
224
- from langchain import hub
225
-
226
-
227
- # Load API keys from .env file or Space secrets
228
- load_dotenv()
229
- hf_token = os.getenv("HF_TOKEN")
230
- tavily_api_key = os.getenv("TAVILY_API_KEY")
231
-
232
- if tavily_api_key:
233
- os.environ["TAVILY_API_KEY"] = tavily_api_key
234
- else:
235
- print("Warning: TAVILY_API_KEY not found. Web search tool will not work.")
236
-
237
- # --- Define Agent Tools ---
238
- # NEW: Using TavilySearch from the correct package
239
- tools = [
240
- TavilySearch(max_results=3, description="A search engine for finding up-to-date information on the web."),
241
- PythonREPLTool()
242
- ]
243
- tool_node = ToolNode(tools)
244
-
245
- # --- Configure the LLM "Brain" ---
246
- repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
247
-
248
- llm = HuggingFaceEndpoint(
249
- repo_id=repo_id,
250
- huggingfacehub_api_token=hf_token,
251
- temperature=0,
252
- max_new_tokens=2048,
253
- )
254
-
255
- # --- THE FIX: Create Agent with a Compatible Method ---
256
- # REMOVED: llm_with_tools = llm.bind_tools(tools)
257
- # This was causing the error.
258
-
259
- # NEW: We pull a pre-made prompt that knows how to handle tool calls.
260
- prompt = hub.pull("hwchase17/react-json")
261
-
262
- # NEW: We use `create_tool_calling_agent`. This function correctly combines the LLM,
263
- # the tools, and the prompt, without needing the .bind_tools() method.
264
- agent_runnable = create_tool_calling_agent(llm, tools, prompt)
265
-
266
-
267
- # --- Build the LangGraph Agent ---
268
- class AgentState(TypedDict):
269
- # The 'messages' key is no longer used, 'input' and 'agent_outcome' are standard for this agent type
270
- input: str
271
- chat_history: list[BaseMessage]
272
- agent_outcome: dict
273
-
274
- # NEW: The agent_node is much simpler now. It just calls the runnable we created.
275
- def agent_node(state):
276
- outcome = agent_runnable.invoke(state)
277
- return {"agent_outcome": outcome}
278
-
279
- def tool_node_executor(state):
280
- # The agent_runnable provides tool calls in a specific format. We execute them.
281
- tool_calls = state["agent_outcome"].tool_calls
282
- tool_outputs = []
283
- for tool_call in tool_calls:
284
- tool_name = tool_call["name"]
285
- tool_to_call = {tool.name: tool for tool in tools}[tool_name]
286
- tool_output = tool_to_call.invoke(tool_call["args"])
287
- tool_outputs.append({"output": tool_output, "tool_call_id": tool_call["id"]})
288
- return {"intermediate_steps": tool_outputs}
289
-
290
-
291
- # This setup is more complex but correctly models the ReAct loop in LangGraph
292
- class BasicAgent:
293
- def __init__(self):
294
- if not hf_token or not tavily_api_key:
295
- raise ValueError("HF_TOKEN or TAVILY_API_KEY not set. Please add them to your Space secrets.")
296
- print("LangGraph Agent initialized successfully.")
297
- # We need an agent executor to run the loop
298
- from langchain.agents import AgentExecutor
299
- self.agent_executor = AgentExecutor(agent=agent_runnable, tools=tools, verbose=True)
300
 
301
- def __call__(self, question: str) -> str:
302
- print(f"Agent received question (first 80 chars): {question[:80]}...")
303
- try:
304
- # The AgentExecutor expects a dictionary with an "input" key.
305
- response = self.agent_executor.invoke({"input": question})
306
- final_answer = response.get("output", "Agent did not produce an output.")
307
- print(f"Agent returning final answer (first 80 chars): {final_answer[:80]}...")
308
- return final_answer
309
- except Exception as e:
310
- print(f"An error occurred in agent execution: {e}")
311
- return f"Error: {e}"
312
 
313
 
314
- # ==============================================================================
315
- # PART 2: THE GRADIO TEST HARNESS UI (UNCHANGED)
316
- # ==============================================================================
317
  # --- Constants ---
318
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
319
 
320
- def run_and_submit_all(profile: gr.OAuthProfile | None):
321
- # This entire function remains the same as the template
322
- space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  if profile:
324
  username= f"{profile.username}"
325
  print(f"User logged in: {username}")
326
  else:
327
  print("User not logged in.")
328
  return "Please Login to Hugging Face with the button.", None
 
329
  api_url = DEFAULT_API_URL
330
  questions_url = f"{api_url}/questions"
331
  submit_url = f"{api_url}/submit"
 
 
332
  try:
333
  agent = BasicAgent()
334
  except Exception as e:
335
  print(f"Error instantiating agent: {e}")
336
  return f"Error initializing agent: {e}", None
 
337
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
 
 
338
  print(f"Fetching questions from: {questions_url}")
339
  try:
340
  response = requests.get(questions_url, timeout=15)
341
  response.raise_for_status()
342
  questions_data = response.json()
 
 
 
343
  print(f"Fetched {len(questions_data)} questions.")
 
 
 
 
 
 
 
344
  except Exception as e:
 
345
  return f"An unexpected error occurred fetching questions: {e}", None
346
- results_log, answers_payload = [], []
 
 
 
347
  print(f"Running agent on {len(questions_data)} questions...")
348
  for item in questions_data:
349
- task_id, question_text = item.get("task_id"), item.get("question")
350
- if not task_id or question_text is None: continue
 
 
 
351
  try:
352
  submitted_answer = agent(question_text)
353
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
354
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
355
  except Exception as e:
356
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
357
- if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
 
 
 
 
 
358
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
359
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
360
  try:
361
  response = requests.post(submit_url, json=submission_data, timeout=60)
362
  response.raise_for_status()
363
  result_data = response.json()
364
- final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\nOverall Score: {result_data.get('score', 'N/A')}% ({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\nMessage: {result_data.get('message', '')}")
365
- return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  except Exception as e:
367
- return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
 
 
 
 
368
 
369
- # --- Gradio Interface (Unchanged) ---
370
  with gr.Blocks() as demo:
371
- gr.Markdown("# GAIA Agent Evaluation Runner")
372
- gr.Markdown("1. Log in. 2. Click 'Run Evaluation'.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  gr.LoginButton()
 
374
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
375
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
376
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
377
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
378
 
379
  if __name__ == "__main__":
380
- demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
+ # (Keep Constants as is)
 
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
+ # --- Basic Agent Definition ---
14
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
+ class BasicAgent:
16
+ def __init__(self):
17
+ print("BasicAgent initialized.")
18
+ def __call__(self, question: str) -> str:
19
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
20
+ fixed_answer = "This is a default answer."
21
+ print(f"Agent returning fixed answer: {fixed_answer}")
22
+ return fixed_answer
23
+
24
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
25
+ """
26
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
27
+ and displays the results.
28
+ """
29
+ # --- Determine HF Space Runtime URL and Repo URL ---
30
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
31
+
32
  if profile:
33
  username= f"{profile.username}"
34
  print(f"User logged in: {username}")
35
  else:
36
  print("User not logged in.")
37
  return "Please Login to Hugging Face with the button.", None
38
+
39
  api_url = DEFAULT_API_URL
40
  questions_url = f"{api_url}/questions"
41
  submit_url = f"{api_url}/submit"
42
+
43
+ # 1. Instantiate Agent ( modify this part to create your agent)
44
  try:
45
  agent = BasicAgent()
46
  except Exception as e:
47
  print(f"Error instantiating agent: {e}")
48
  return f"Error initializing agent: {e}", None
49
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
50
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
51
+ print(agent_code)
52
+
53
+ # 2. Fetch Questions
54
  print(f"Fetching questions from: {questions_url}")
55
  try:
56
  response = requests.get(questions_url, timeout=15)
57
  response.raise_for_status()
58
  questions_data = response.json()
59
+ if not questions_data:
60
+ print("Fetched questions list is empty.")
61
+ return "Fetched questions list is empty or invalid format.", None
62
  print(f"Fetched {len(questions_data)} questions.")
63
+ except requests.exceptions.RequestException as e:
64
+ print(f"Error fetching questions: {e}")
65
+ return f"Error fetching questions: {e}", None
66
+ except requests.exceptions.JSONDecodeError as e:
67
+ print(f"Error decoding JSON response from questions endpoint: {e}")
68
+ print(f"Response text: {response.text[:500]}")
69
+ return f"Error decoding server response for questions: {e}", None
70
  except Exception as e:
71
+ print(f"An unexpected error occurred fetching questions: {e}")
72
  return f"An unexpected error occurred fetching questions: {e}", None
73
+
74
+ # 3. Run your Agent
75
+ results_log = []
76
+ answers_payload = []
77
  print(f"Running agent on {len(questions_data)} questions...")
78
  for item in questions_data:
79
+ task_id = item.get("task_id")
80
+ question_text = item.get("question")
81
+ if not task_id or question_text is None:
82
+ print(f"Skipping item with missing task_id or question: {item}")
83
+ continue
84
  try:
85
  submitted_answer = agent(question_text)
86
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
87
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
88
  except Exception as e:
89
+ print(f"Error running agent on task {task_id}: {e}")
90
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
91
+
92
+ if not answers_payload:
93
+ print("Agent did not produce any answers to submit.")
94
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
95
+
96
+ # 4. Prepare Submission
97
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
98
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
99
+ print(status_update)
100
+
101
+ # 5. Submit
102
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
103
  try:
104
  response = requests.post(submit_url, json=submission_data, timeout=60)
105
  response.raise_for_status()
106
  result_data = response.json()
107
+ final_status = (
108
+ f"Submission Successful!\n"
109
+ f"User: {result_data.get('username')}\n"
110
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
111
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
112
+ f"Message: {result_data.get('message', 'No message received.')}"
113
+ )
114
+ print("Submission successful.")
115
+ results_df = pd.DataFrame(results_log)
116
+ return final_status, results_df
117
+ except requests.exceptions.HTTPError as e:
118
+ error_detail = f"Server responded with status {e.response.status_code}."
119
+ try:
120
+ error_json = e.response.json()
121
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
122
+ except requests.exceptions.JSONDecodeError:
123
+ error_detail += f" Response: {e.response.text[:500]}"
124
+ status_message = f"Submission Failed: {error_detail}"
125
+ print(status_message)
126
+ results_df = pd.DataFrame(results_log)
127
+ return status_message, results_df
128
+ except requests.exceptions.Timeout:
129
+ status_message = "Submission Failed: The request timed out."
130
+ print(status_message)
131
+ results_df = pd.DataFrame(results_log)
132
+ return status_message, results_df
133
+ except requests.exceptions.RequestException as e:
134
+ status_message = f"Submission Failed: Network error - {e}"
135
+ print(status_message)
136
+ results_df = pd.DataFrame(results_log)
137
+ return status_message, results_df
138
  except Exception as e:
139
+ status_message = f"An unexpected error occurred during submission: {e}"
140
+ print(status_message)
141
+ results_df = pd.DataFrame(results_log)
142
+ return status_message, results_df
143
+
144
 
145
+ # --- Build Gradio Interface using Blocks ---
146
  with gr.Blocks() as demo:
147
+ gr.Markdown("# Basic Agent Evaluation Runner")
148
+ gr.Markdown(
149
+ """
150
+ **Instructions:**
151
+
152
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
153
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
154
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
155
+
156
+ ---
157
+ **Disclaimers:**
158
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
159
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
160
+ """
161
+ )
162
+
163
  gr.LoginButton()
164
+
165
  run_button = gr.Button("Run Evaluation & Submit All Answers")
166
+
167
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
168
+ # Removed max_rows=10 from DataFrame constructor
169
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
170
+
171
+ run_button.click(
172
+ fn=run_and_submit_all,
173
+ outputs=[status_output, results_table]
174
+ )
175
 
176
  if __name__ == "__main__":
177
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
178
+ # Check for SPACE_HOST and SPACE_ID at startup for information
179
+ space_host_startup = os.getenv("SPACE_HOST")
180
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
181
+
182
+ if space_host_startup:
183
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
184
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
185
+ else:
186
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
187
+
188
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
189
+ print(f"✅ SPACE_ID found: {space_id_startup}")
190
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
191
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
192
+ else:
193
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
194
+
195
+ print("-"*(60 + len(" App Starting ")) + "\n")
196
+
197
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
198
+ demo.launch(debug=True, share=False)
199
+
200
+ ###########################################
201
+ # import os
202
+ # import gradio as gr
203
+ # import requests
204
+ # import inspect
205
+ # import pandas as pd
206
+ # from dotenv import load_dotenv
207
+ # from typing import TypedDict, Annotated, List
208
+
209
+ # # ==============================================================================
210
+ # # PART 1: YOUR AGENT'S LOGIC AND DEFINITION
211
+ # # ==============================================================================
212
+
213
+ # # LangChain and LangGraph imports
214
+ # from langchain_huggingface import HuggingFaceEndpoint
215
+ # # NEW: Import TavilySearch from the new package
216
+ # from langchain_tavily import TavilySearch
217
+ # from langchain_experimental.tools import PythonREPLTool
218
+ # from langchain_core.messages import BaseMessage, HumanMessage
219
+ # from langgraph.graph import StateGraph, END
220
+ # from langgraph.prebuilt import ToolNode
221
+ # # NEW: Import the compatible agent constructor and prompt hub
222
+ # from langchain.agents import create_tool_calling_agent
223
+ # from langchain import hub
224
+
225
+
226
+ # # Load API keys from .env file or Space secrets
227
+ # load_dotenv()
228
+ # hf_token = os.getenv("HF_TOKEN")
229
+ # tavily_api_key = os.getenv("TAVILY_API_KEY")
230
+
231
+ # if tavily_api_key:
232
+ # os.environ["TAVILY_API_KEY"] = tavily_api_key
233
+ # else:
234
+ # print("Warning: TAVILY_API_KEY not found. Web search tool will not work.")
235
+
236
+ # # --- Define Agent Tools ---
237
+ # # NEW: Using TavilySearch from the correct package
238
+ # tools = [
239
+ # TavilySearch(max_results=3, description="A search engine for finding up-to-date information on the web."),
240
+ # PythonREPLTool()
241
+ # ]
242
+ # tool_node = ToolNode(tools)
243
+
244
+ # # --- Configure the LLM "Brain" ---
245
+ # repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
246
+
247
+ # llm = HuggingFaceEndpoint(
248
+ # repo_id=repo_id,
249
+ # huggingfacehub_api_token=hf_token,
250
+ # temperature=0,
251
+ # max_new_tokens=2048,
252
+ # )
253
+
254
+ # # --- THE FIX: Create Agent with a Compatible Method ---
255
+ # # REMOVED: llm_with_tools = llm.bind_tools(tools)
256
+ # # This was causing the error.
257
+
258
+ # # NEW: We pull a pre-made prompt that knows how to handle tool calls.
259
+ # prompt = hub.pull("hwchase17/react-json")
260
+
261
+ # # NEW: We use `create_tool_calling_agent`. This function correctly combines the LLM,
262
+ # # the tools, and the prompt, without needing the .bind_tools() method.
263
+ # agent_runnable = create_tool_calling_agent(llm, tools, prompt)
264
+
265
+
266
+ # # --- Build the LangGraph Agent ---
267
+ # class AgentState(TypedDict):
268
+ # # The 'messages' key is no longer used, 'input' and 'agent_outcome' are standard for this agent type
269
+ # input: str
270
+ # chat_history: list[BaseMessage]
271
+ # agent_outcome: dict
272
+
273
+ # # NEW: The agent_node is much simpler now. It just calls the runnable we created.
274
+ # def agent_node(state):
275
+ # outcome = agent_runnable.invoke(state)
276
+ # return {"agent_outcome": outcome}
277
+
278
+ # def tool_node_executor(state):
279
+ # # The agent_runnable provides tool calls in a specific format. We execute them.
280
+ # tool_calls = state["agent_outcome"].tool_calls
281
+ # tool_outputs = []
282
+ # for tool_call in tool_calls:
283
+ # tool_name = tool_call["name"]
284
+ # tool_to_call = {tool.name: tool for tool in tools}[tool_name]
285
+ # tool_output = tool_to_call.invoke(tool_call["args"])
286
+ # tool_outputs.append({"output": tool_output, "tool_call_id": tool_call["id"]})
287
+ # return {"intermediate_steps": tool_outputs}
288
+
289
+
290
+ # # This setup is more complex but correctly models the ReAct loop in LangGraph
291
+ # class BasicAgent:
292
+ # def __init__(self):
293
+ # if not hf_token or not tavily_api_key:
294
+ # raise ValueError("HF_TOKEN or TAVILY_API_KEY not set. Please add them to your Space secrets.")
295
+ # print("LangGraph Agent initialized successfully.")
296
+ # # We need an agent executor to run the loop
297
+ # from langchain.agents import AgentExecutor
298
+ # self.agent_executor = AgentExecutor(agent=agent_runnable, tools=tools, verbose=True)
299
+
300
+ # def __call__(self, question: str) -> str:
301
+ # print(f"Agent received question (first 80 chars): {question[:80]}...")
302
+ # try:
303
+ # # The AgentExecutor expects a dictionary with an "input" key.
304
+ # response = self.agent_executor.invoke({"input": question})
305
+ # final_answer = response.get("output", "Agent did not produce an output.")
306
+ # print(f"Agent returning final answer (first 80 chars): {final_answer[:80]}...")
307
+ # return final_answer
308
+ # except Exception as e:
309
+ # print(f"An error occurred in agent execution: {e}")
310
+ # return f"Error: {e}"
311
+
312
+
313
+ # # ==============================================================================
314
+ # # PART 2: THE GRADIO TEST HARNESS UI (UNCHANGED)
315
+ # # ==============================================================================
316
+ # # --- Constants ---
317
+ # DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
318
+
319
+ # def run_and_submit_all(profile: gr.OAuthProfile | None):
320
+ # # This entire function remains the same as the template
321
+ # space_id = os.getenv("SPACE_ID")
322
+ # if profile:
323
+ # username= f"{profile.username}"
324
+ # print(f"User logged in: {username}")
325
+ # else:
326
+ # print("User not logged in.")
327
+ # return "Please Login to Hugging Face with the button.", None
328
+ # api_url = DEFAULT_API_URL
329
+ # questions_url = f"{api_url}/questions"
330
+ # submit_url = f"{api_url}/submit"
331
+ # try:
332
+ # agent = BasicAgent()
333
+ # except Exception as e:
334
+ # print(f"Error instantiating agent: {e}")
335
+ # return f"Error initializing agent: {e}", None
336
+ # agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
337
+ # print(f"Fetching questions from: {questions_url}")
338
+ # try:
339
+ # response = requests.get(questions_url, timeout=15)
340
+ # response.raise_for_status()
341
+ # questions_data = response.json()
342
+ # print(f"Fetched {len(questions_data)} questions.")
343
+ # except Exception as e:
344
+ # return f"An unexpected error occurred fetching questions: {e}", None
345
+ # results_log, answers_payload = [], []
346
+ # print(f"Running agent on {len(questions_data)} questions...")
347
+ # for item in questions_data:
348
+ # task_id, question_text = item.get("task_id"), item.get("question")
349
+ # if not task_id or question_text is None: continue
350
+ # try:
351
+ # submitted_answer = agent(question_text)
352
+ # answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
353
+ # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
354
+ # except Exception as e:
355
+ # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
356
+ # if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
357
+ # submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
358
+ # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
359
+ # try:
360
+ # response = requests.post(submit_url, json=submission_data, timeout=60)
361
+ # response.raise_for_status()
362
+ # result_data = response.json()
363
+ # final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\nOverall Score: {result_data.get('score', 'N/A')}% ({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\nMessage: {result_data.get('message', '')}")
364
+ # return final_status, pd.DataFrame(results_log)
365
+ # except Exception as e:
366
+ # return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
367
+
368
+ # # --- Gradio Interface (Unchanged) ---
369
+ # with gr.Blocks() as demo:
370
+ # gr.Markdown("# GAIA Agent Evaluation Runner")
371
+ # gr.Markdown("1. Log in. 2. Click 'Run Evaluation'.")
372
+ # gr.LoginButton()
373
+ # run_button = gr.Button("Run Evaluation & Submit All Answers")
374
+ # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
375
+ # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
376
+ # run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
377
+
378
+ # if __name__ == "__main__":
379
+ # demo.launch(debug=True, share=False)