Spaces:

smolagents
/

smolagents-leaderboard

Running

App Files Files Community

m-ric commited on Feb 27

Commit

e2de670

verified ·

1 Parent(s): 135ada9

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -45

app.py CHANGED Viewed

@@ -28,55 +28,50 @@ app.add_middleware(
 @app.get("/api/results")
 async def get_results():
-    try:
-        # Load the dataset
-        dataset = load_dataset("smolagents/results")
-        # Convert to list for processing
-        df = dataset["train"].to_pandas()
-        # Log some info to help debug
-        print("Dataset loaded, shape:", df.shape)
-        print("Columns:", df.columns)
-        # Process the data to match frontend expectations
-        result = []
-        # Ensure we have the expected columns
-        expected_columns = ['model_id', 'agent_action_type', 'benchmark', 'score']
-        for col in expected_columns:
-            if col not in df.columns:
-                print(f"Warning: Column {col} not found in dataset")
-        # Group by model_id and agent_action_type to create the expected structure
-        for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
-            # Calculate scores for each benchmark
-            benchmark_scores = {}
-            benchmarks = ['GAIA', 'MATH', 'SimpleQA']
-            for benchmark in benchmarks:
-                benchmark_group = group[group['benchmark'] == benchmark]
-                if not benchmark_group.empty:
-                    benchmark_scores[benchmark] = benchmark_group['score'].mean() * 100  # Convert to percentage
-            # Calculate average if we have at least one benchmark score
-            if benchmark_scores:
-                benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
-            # Add entry to result
-            result.append({
-                'model_id': model_id,
-                'agent_action_type': agent_action_type,
-                'scores': benchmark_scores
-            })
-        print(f"Processed {len(result)} entries for the frontend")
-        # Return the properly formatted data as a JSON response
-        return result
-        return data
-    except Exception as e:
-        # Print the full error traceback to your logs
-        print("Error occurred:", str(e))
-        raise HTTPException(status_code=500, detail=str(e))

 @app.get("/api/results")
 async def get_results():
+    # Load the dataset
+    dataset = load_dataset("smolagents/results")
+    # Convert to list for processing
+    df = dataset["train"].to_pandas()
+    # Log some info to help debug
+    print("Dataset loaded, shape:", df.shape)
+    print("Columns:", df.columns)
+    # Process the data to match frontend expectations
+    result = []
+    # Ensure we have the expected columns
+    expected_columns = ['model_id', 'agent_action_type', 'source', 'acc']
+    for col in expected_columns:
+        if col not in df.columns:
+            print(f"Warning: Column {col} not found in dataset")
+    # Group by model_id and agent_action_type to create the expected structure
+    for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
+        # Calculate scores for each benchmark
+        benchmark_scores = {}
+        benchmarks = ['GAIA', 'MATH', 'SimpleQA']
+        for benchmark in benchmarks:
+            benchmark_group = group[group['source'] == benchmark]
+            if not benchmark_group.empty:
+                benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100  # Convert to percentage
+        # Calculate average if we have at least one benchmark score
+        if benchmark_scores:
+            benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
+        # Add entry to result
+        result.append({
+            'model_id': model_id,
+            'agent_action_type': agent_action_type,
+            'scores': benchmark_scores
+        })
+    print(f"Processed {len(result)} entries for the frontend")
+    # Return the properly formatted data as a JSON response
+    return result
+    return data