Spaces:

agents-course
/

Unit4_scoring

Running

App Files Files Community

Jofthomas commited on Apr 23

Commit

9847443

verified ·

1 Parent(s): 309e0ef

Update main.py

Browse files

Files changed (1) hide show

main.py +75 -31

main.py CHANGED Viewed

@@ -25,95 +25,139 @@ step_threshold = 5
 questions_for_api: List[Dict[str, Any]] = []
 ground_truth_answers: Dict[str, str] = {}
 filtered_dataset = None
 # --- Define ErrorResponse if not already defined ---
 class ErrorResponse(BaseModel):
     detail: str
 def load_questions():
     global filtered_dataset
     global questions_for_api
     global ground_truth_answers
-    global task_file_paths # Declare modification of global
     tempo_filtered = []
-    # Clear existing data
     questions_for_api.clear()
     ground_truth_answers.clear()
-    task_file_paths.clear() # Clear the mapping too
     logger.info("Starting to load and filter GAIA dataset (validation split)...")
     try:
         dataset = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation", trust_remote_code=True)
         logger.info(f"GAIA dataset validation split loaded. Features: {dataset.features}")
     except Exception as e:
         logger.error(f"Failed to load GAIA dataset: {e}", exc_info=True)
         raise RuntimeError("Could not load the primary GAIA dataset.") from e
-    # --- Filtering Logic (remains same) ---
-    # [ ... Same filtering code as before ... ]
     for item in dataset:
         metadata = item.get('Annotator Metadata')
-        if metadata: # Check if 'Annotator Metadata' exists
             num_tools_str = metadata.get('Number of tools')
             num_steps_str = metadata.get('Number of steps')
             if num_tools_str is not None and num_steps_str is not None:
                 try:
                     num_tools = int(num_tools_str)
                     num_steps = int(num_steps_str)
                     if num_tools < tool_threshold and num_steps < step_threshold:
-                        tempo_filtered.append(item)
                 except ValueError:
-                     logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Could not convert tool/step count.")
-        # else: # Log missing metadata if needed
-             # logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Missing 'Annotator Metadata'.")
-    filtered_dataset = tempo_filtered
-    logger.info(f"Found {len(filtered_dataset)} questions matching the criteria.")
     processed_count = 0
-    # --- Processing Logic (includes storing file path mapping) ---
     for item in filtered_dataset:
         task_id = item.get('task_id')
         original_question_text = item.get('Question')
         final_answer = item.get('Final answer')
-        local_file_path = item.get('file_path') # Get the local path
-        file_name = item.get('file_name') # Get the filename
-        # Validate essential fields
         if task_id and original_question_text and final_answer is not None:
-            # Create the dictionary for the API (WITHOUT file_path)
             processed_item = {
                 "task_id": str(task_id),
-                "question": str(original_question_text),
                 "Level": item.get("Level"),
-                "file_name": file_name, # Include filename for info
             }
-            # Clean None values if you prefer not to send nulls for optional fields
             processed_item = {k: v for k, v in processed_item.items() if v is not None}
             questions_for_api.append(processed_item)
-            # Store ground truth
             ground_truth_answers[str(task_id)] = str(final_answer)
-            # --- Store the file path mapping ---
-            if local_file_path and file_name: # Only store if path and name exist
-                 # Basic check if path looks plausible (optional)
-                 if os.path.exists(local_file_path):
-                      task_file_paths[str(task_id)] = local_file_path
-                      logger.debug(f"Stored file path for task_id {task_id}: {local_file_path}")
                  else:
-                      logger.warning(f"File path '{local_file_path}' for task_id {task_id} does not exist on server. Mapping skipped.")
             processed_count += 1
         else:
-            logger.warning(f"Skipping item due to missing essential fields: task_id={task_id}")
     logger.info(f"Successfully processed {processed_count} questions for the API.")
     logger.info(f"Stored file path mappings for {len(task_file_paths)} tasks.")
     if not questions_for_api:
-         logger.error("CRITICAL: No valid questions loaded after filtering/processing.")

 questions_for_api: List[Dict[str, Any]] = []
 ground_truth_answers: Dict[str, str] = {}
 filtered_dataset = None
+ALLOWED_CACHE_BASE = os.path.abspath("/app/.cache")
 # --- Define ErrorResponse if not already defined ---
 class ErrorResponse(BaseModel):
     detail: str
 def load_questions():
+    """
+    Loads the GAIA dataset, filters questions based on tool/step counts,
+    populates 'questions_for_api' with data for the API (excluding sensitive/internal fields),
+    stores ground truth answers, and maps task IDs to their local file paths on the server.
+    """
     global filtered_dataset
     global questions_for_api
     global ground_truth_answers
+    global task_file_paths  # Declare modification of global
     tempo_filtered = []
+    # Clear existing data from previous runs or restarts
     questions_for_api.clear()
     ground_truth_answers.clear()
+    task_file_paths.clear() # Clear the file path mapping
     logger.info("Starting to load and filter GAIA dataset (validation split)...")
     try:
+        # Load the specified split
         dataset = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation", trust_remote_code=True)
         logger.info(f"GAIA dataset validation split loaded. Features: {dataset.features}")
     except Exception as e:
         logger.error(f"Failed to load GAIA dataset: {e}", exc_info=True)
+        # Depending on requirements, you might want to exit or raise a more specific error
         raise RuntimeError("Could not load the primary GAIA dataset.") from e
+    # --- Filtering Logic based on Annotator Metadata ---
     for item in dataset:
         metadata = item.get('Annotator Metadata')
+        if metadata:
             num_tools_str = metadata.get('Number of tools')
             num_steps_str = metadata.get('Number of steps')
             if num_tools_str is not None and num_steps_str is not None:
                 try:
                     num_tools = int(num_tools_str)
                     num_steps = int(num_steps_str)
+                    # Apply filter conditions
                     if num_tools < tool_threshold and num_steps < step_threshold:
+                        tempo_filtered.append(item) # Add the original item if it matches filter
                 except ValueError:
+                    logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Could not convert tool/step count in metadata: tools='{num_tools_str}', steps='{num_steps_str}'.")
+            else:
+                 logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - 'Number of tools' or 'Number of steps' missing in Metadata.")
+        else:
+             # If metadata is essential for filtering, you might want to skip items without it
+             logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Missing 'Annotator Metadata'.")
+    filtered_dataset = tempo_filtered # Store the list of filtered original dataset items
+    logger.info(f"Found {len(filtered_dataset)} questions matching the criteria (tools < {tool_threshold}, steps < {step_threshold}).")
     processed_count = 0
+    # --- Process filtered items for API and File Mapping ---
     for item in filtered_dataset:
+        # Extract data from the dataset item
         task_id = item.get('task_id')
         original_question_text = item.get('Question')
         final_answer = item.get('Final answer')
+        local_file_path = item.get('file_path') # Server-local path from dataset
+        file_name = item.get('file_name')       # Filename from dataset
+        # Validate essential fields needed for processing & ground truth
+        # Note: We proceed even if file path/name are missing, just won't map the file.
         if task_id and original_question_text and final_answer is not None:
+            # 1. Create the dictionary to be exposed via the API
+            #    (Includes 'file_name' for info, but excludes 'file_path')
             processed_item = {
                 "task_id": str(task_id),
+                "question": str(original_question_text), # Rename 'Question' -> 'question'
+                # Include other desired fields, using .get() for safety
                 "Level": item.get("Level"),
+                "file_name": file_name, # Include filename for client info
             }
+            # Optional: Remove keys with None values if you prefer cleaner JSON
             processed_item = {k: v for k, v in processed_item.items() if v is not None}
             questions_for_api.append(processed_item)
+            # 2. Store the ground truth answer separately
             ground_truth_answers[str(task_id)] = str(final_answer)
+            # 3. Store the file path mapping if file details exist and are valid
+            if local_file_path and file_name:
+                 # Log if the path from the dataset isn't absolute (might indicate issues)
+                 if not os.path.isabs(local_file_path):
+                     logger.warning(f"Task {task_id}: Path '{local_file_path}' from dataset is not absolute. This might cause issues finding the file on the server.")
+                     # Depending on dataset guarantees, you might try making it absolute:
+                     # Assuming WORKDIR is /app as per Dockerfile if paths are relative
+                     # local_file_path = os.path.abspath(os.path.join("/app", local_file_path))
+                 # Check if the file actually exists at the path ON THE SERVER
+                 if os.path.exists(local_file_path) and os.path.isfile(local_file_path):
+                     # Path exists, store the mapping
+                     task_file_paths[str(task_id)] = local_file_path
+                     logger.debug(f"Stored file path mapping for task_id {task_id}: {local_file_path}")
                  else:
+                     # Path does *not* exist or is not a file on server filesystem
+                     logger.warning(f"File path '{local_file_path}' for task_id {task_id} does NOT exist or is not a file on server. Mapping skipped.")
+            # Log if file info was missing in the first place
+            elif task_id: # Log only if we have a task_id to reference
+                 # Check which specific part was missing for better debugging
+                 if not local_file_path and not file_name:
+                     logger.debug(f"Task {task_id}: No 'file_path' or 'file_name' found in dataset item. No file mapping stored.")
+                 elif not local_file_path:
+                     logger.debug(f"Task {task_id}: 'file_path' is missing in dataset item (file_name: '{file_name}'). No file mapping stored.")
+                 else: # Not file_name
+                      logger.debug(f"Task {task_id}: 'file_name' is missing in dataset item (file_path: '{local_file_path}'). No file mapping stored.")
             processed_count += 1
         else:
+            # Log skipping due to missing core fields (task_id, Question, Final answer)
+            logger.warning(f"Skipping item processing due to missing essential fields: task_id={task_id}, has_question={original_question_text is not None}, has_answer={final_answer is not None}")
+    # Final summary logging
     logger.info(f"Successfully processed {processed_count} questions for the API.")
     logger.info(f"Stored file path mappings for {len(task_file_paths)} tasks.")
     if not questions_for_api:
+         logger.error("CRITICAL: No valid questions were loaded after filtering and processing. API endpoints like /questions will fail.")
+         # Consider raising an error if the application cannot function without questions
+         # raise RuntimeError("Failed to load mandatory question data after filtering.")