Romain Fayoux commited on
Commit
11a8722
·
1 Parent(s): cbdb630

Changed temporarily to llmagent as model in multiagent not available

Browse files
Files changed (1) hide show
  1. app.py +58 -26
app.py CHANGED
@@ -16,27 +16,30 @@ import phoenix as px
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
 
19
  # --- Basic Agent Definition ---
20
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
21
  class BasicAgent:
22
  def __init__(self):
23
  print("BasicAgent initialized.")
 
24
  def __call__(self, question: str) -> str:
25
  print(f"Agent received question (first 50 chars): {question[:50]}...")
26
  fixed_answer = "This is a default answer."
27
  print(f"Agent returning fixed answer: {fixed_answer}")
28
  return fixed_answer
29
 
30
- def run_and_submit_all( profile: gr.OAuthProfile | None, limit: int | None):
 
31
  """
32
  Fetches all questions, runs the BasicAgent on them, submits all answers,
33
  and displays the results.
34
  """
35
  # --- Determine HF Space Runtime URL and Repo URL ---
36
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
37
 
38
  if profile:
39
- username= f"{profile.username}"
40
  print(f"User logged in: {username}")
41
  else:
42
  print("User not logged in.")
@@ -49,7 +52,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, limit: int | None):
49
 
50
  # 1. Instantiate Agent ( modify this part to create your agent)
51
  try:
52
- agent = MultiAgent()
53
  except Exception as e:
54
  print(f"Error instantiating agent: {e}")
55
  return f"Error initializing agent: {e}", None
@@ -64,7 +67,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, limit: int | None):
64
  print("Fetching questions from local file")
65
  with open("data/questions.json", "r") as f:
66
  questions_data = json.load(f)
67
- questions_data = [q for q in questions_data if q['task_id'] in task_ids]
68
  # Otherwise fetch from Hugging Face API
69
  else:
70
  print(f"Fetching questions from: {questions_url}")
@@ -99,20 +102,38 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, limit: int | None):
99
  task_id = item.get("task_id")
100
  file_name = item.get("file_name")
101
  if file_name != "":
102
- file_path = f"{files_url}/{task_id}"
103
- question_text = item.get("question") + "The mentionned file can be downloaded from the following link: " + file_path
 
 
 
 
104
  else:
105
- question_text = item.get("question")
106
  if not task_id or question_text is None:
107
  print(f"Skipping item with missing task_id or question: {item}")
108
  continue
109
  try:
110
  submitted_answer = agent(question_text)
111
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
112
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
113
  except Exception as e:
114
- print(f"Error running agent on task {task_id}: {e}")
115
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
116
 
117
  if not answers_payload:
118
  print("Agent did not produce any answers to submit.")
@@ -136,21 +157,29 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, limit: int | None):
136
  # Log evaluations to Phoenix
137
  log_evaluations_to_phoenix(evaluations_df)
138
 
139
- print(f"Ground truth comparison completed: {summary_stats['exact_matches']}/{summary_stats['total_questions']} exact matches")
 
 
140
 
141
  except Exception as e:
142
  print(f"Error during ground truth comparison: {e}")
143
  summary_stats = {"error": str(e)}
144
 
145
  # 4. Prepare Submission
146
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
147
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
148
 
149
  # Add ground truth comparison to status
150
  if "error" not in summary_stats:
151
  status_update += f"\n\nGround Truth Comparison:\n"
152
  status_update += f"Exact matches: {summary_stats['exact_matches']}/{summary_stats['total_questions']} ({summary_stats['exact_match_rate']:.1%})\n"
153
- status_update += f"Average similarity: {summary_stats['average_similarity']:.3f}\n"
 
 
154
  status_update += f"Contains correct answer: {summary_stats['contains_matches']}/{summary_stats['total_questions']} ({summary_stats['contains_match_rate']:.1%})\n"
155
  status_update += f"Evaluations logged to Phoenix ✅"
156
  else:
@@ -224,17 +253,16 @@ with gr.Blocks() as demo:
224
 
225
  run_button = gr.Button("Run Evaluation & Submit All Answers")
226
 
227
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
228
  # Removed max_rows=10 from DataFrame constructor
229
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
230
 
231
- run_button.click(
232
- fn=run_and_submit_all,
233
- outputs=[status_output, results_table]
234
- )
235
 
236
  if __name__ == "__main__":
237
- print("\n" + "-"*30 + " App Starting " + "-"*30)
238
 
239
  # Telemetry
240
  register()
@@ -242,7 +270,7 @@ if __name__ == "__main__":
242
 
243
  # Check for SPACE_HOST and SPACE_ID at startup for information
244
  space_host_startup = os.getenv("SPACE_HOST")
245
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
246
 
247
  if space_host_startup:
248
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -250,14 +278,18 @@ if __name__ == "__main__":
250
  else:
251
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
252
 
253
- if space_id_startup: # Print repo URLs if SPACE_ID is found
254
  print(f"✅ SPACE_ID found: {space_id_startup}")
255
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
256
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
257
  else:
258
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
259
 
260
- print("-"*(60 + len(" App Starting ")) + "\n")
261
 
262
  print("Launching Gradio Interface for Basic Agent Evaluation...")
263
  demo.launch(debug=True, share=False)
 
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+
20
  # --- Basic Agent Definition ---
21
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
22
  class BasicAgent:
23
  def __init__(self):
24
  print("BasicAgent initialized.")
25
+
26
  def __call__(self, question: str) -> str:
27
  print(f"Agent received question (first 50 chars): {question[:50]}...")
28
  fixed_answer = "This is a default answer."
29
  print(f"Agent returning fixed answer: {fixed_answer}")
30
  return fixed_answer
31
 
32
+
33
+ def run_and_submit_all(profile: gr.OAuthProfile | None, limit: int | None):
34
  """
35
  Fetches all questions, runs the BasicAgent on them, submits all answers,
36
  and displays the results.
37
  """
38
  # --- Determine HF Space Runtime URL and Repo URL ---
39
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
40
 
41
  if profile:
42
+ username = f"{profile.username}"
43
  print(f"User logged in: {username}")
44
  else:
45
  print("User not logged in.")
 
52
 
53
  # 1. Instantiate Agent ( modify this part to create your agent)
54
  try:
55
+ agent = LLMOnlyAgent()
56
  except Exception as e:
57
  print(f"Error instantiating agent: {e}")
58
  return f"Error initializing agent: {e}", None
 
67
  print("Fetching questions from local file")
68
  with open("data/questions.json", "r") as f:
69
  questions_data = json.load(f)
70
+ questions_data = [q for q in questions_data if q["task_id"] in task_ids]
71
  # Otherwise fetch from Hugging Face API
72
  else:
73
  print(f"Fetching questions from: {questions_url}")
 
102
  task_id = item.get("task_id")
103
  file_name = item.get("file_name")
104
  if file_name != "":
105
+ file_path = f"{files_url}/{task_id}"
106
+ question_text = (
107
+ item.get("question")
108
+ + "The mentionned file can be downloaded from the following link: "
109
+ + file_path
110
+ )
111
  else:
112
+ question_text = item.get("question")
113
  if not task_id or question_text is None:
114
  print(f"Skipping item with missing task_id or question: {item}")
115
  continue
116
  try:
117
  submitted_answer = agent(question_text)
118
+ answers_payload.append(
119
+ {"task_id": task_id, "submitted_answer": submitted_answer}
120
+ )
121
+ results_log.append(
122
+ {
123
+ "Task ID": task_id,
124
+ "Question": question_text,
125
+ "Submitted Answer": submitted_answer,
126
+ }
127
+ )
128
  except Exception as e:
129
+ print(f"Error running agent on task {task_id}: {e}")
130
+ results_log.append(
131
+ {
132
+ "Task ID": task_id,
133
+ "Question": question_text,
134
+ "Submitted Answer": f"AGENT ERROR: {e}",
135
+ }
136
+ )
137
 
138
  if not answers_payload:
139
  print("Agent did not produce any answers to submit.")
 
157
  # Log evaluations to Phoenix
158
  log_evaluations_to_phoenix(evaluations_df)
159
 
160
+ print(
161
+ f"Ground truth comparison completed: {summary_stats['exact_matches']}/{summary_stats['total_questions']} exact matches"
162
+ )
163
 
164
  except Exception as e:
165
  print(f"Error during ground truth comparison: {e}")
166
  summary_stats = {"error": str(e)}
167
 
168
  # 4. Prepare Submission
169
+ submission_data = {
170
+ "username": username.strip(),
171
+ "agent_code": agent_code,
172
+ "answers": answers_payload,
173
+ }
174
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
175
 
176
  # Add ground truth comparison to status
177
  if "error" not in summary_stats:
178
  status_update += f"\n\nGround Truth Comparison:\n"
179
  status_update += f"Exact matches: {summary_stats['exact_matches']}/{summary_stats['total_questions']} ({summary_stats['exact_match_rate']:.1%})\n"
180
+ status_update += (
181
+ f"Average similarity: {summary_stats['average_similarity']:.3f}\n"
182
+ )
183
  status_update += f"Contains correct answer: {summary_stats['contains_matches']}/{summary_stats['total_questions']} ({summary_stats['contains_match_rate']:.1%})\n"
184
  status_update += f"Evaluations logged to Phoenix ✅"
185
  else:
 
253
 
254
  run_button = gr.Button("Run Evaluation & Submit All Answers")
255
 
256
+ status_output = gr.Textbox(
257
+ label="Run Status / Submission Result", lines=5, interactive=False
258
+ )
259
  # Removed max_rows=10 from DataFrame constructor
260
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
261
 
262
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
263
 
264
  if __name__ == "__main__":
265
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
266
 
267
  # Telemetry
268
  register()
 
270
 
271
  # Check for SPACE_HOST and SPACE_ID at startup for information
272
  space_host_startup = os.getenv("SPACE_HOST")
273
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
274
 
275
  if space_host_startup:
276
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
278
  else:
279
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
280
 
281
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
282
  print(f"✅ SPACE_ID found: {space_id_startup}")
283
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
284
+ print(
285
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
286
+ )
287
  else:
288
+ print(
289
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
290
+ )
291
 
292
+ print("-" * (60 + len(" App Starting ")) + "\n")
293
 
294
  print("Launching Gradio Interface for Basic Agent Evaluation...")
295
  demo.launch(debug=True, share=False)