DABstep

Running on CPU Upgrade

martinigoyanes commited on Apr 9

Commit

42f179a

1 Parent(s): 2c9a73e

fix: parsing of validated and unvalidated submissions

Files changed (1) hide show

dabstep_benchmark/leaderboard.py CHANGED Viewed

@@ -234,7 +234,7 @@ def generate_leaderboard_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
     task_scores_df = DATASETS["task_scores"].to_pandas()
     submissions_df = DATASETS["submissions"].to_pandas()
-    # get metadata of each submssion_id
     submissions_df = (
         submissions_df.groupby("submission_id")
         .first()
@@ -297,23 +297,23 @@ def generate_leaderboard_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
     }
     col_order = [new_col_name for new_col_name in col_map.values()]
     leaderboard_df.rename(columns=col_map, inplace=True)
-    df = leaderboard_df[col_order].copy()
     # formatting
     # convert scores to %
-    df["Easy Level Accuracy (%)"] = df["Easy Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
-    df["Hard Level Accuracy (%)"] = df["Hard Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
     # make repo url clickable in markdown
-    df["Repo URL"] = df["Repo URL"].apply(lambda x: f"[Link]({x})" if x != "" else x)
     # make agent name bold
-    df["Agent"] = df["Agent"].apply(lambda x: f"**{x}**")
     # sort-by best score
-    df.sort_values(by="Hard Level Accuracy (%)", ascending=False, inplace=True)
-    validated = leaderboard_df[leaderboard_df["validated"] == True].drop(columns=["validated"])
-    unvalidated = leaderboard_df[leaderboard_df["validated"] == False].drop(columns=["validated"])
-    return validated, unvalidated

     task_scores_df = DATASETS["task_scores"].to_pandas()
     submissions_df = DATASETS["submissions"].to_pandas()
+    # get metadata of each submission_id
     submissions_df = (
         submissions_df.groupby("submission_id")
         .first()
     }
     col_order = [new_col_name for new_col_name in col_map.values()]
     leaderboard_df.rename(columns=col_map, inplace=True)
+    leaderboard_df = leaderboard_df[col_order]
     # formatting
     # convert scores to %
+    leaderboard_df["Easy Level Accuracy (%)"] = leaderboard_df["Easy Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
+    leaderboard_df["Hard Level Accuracy (%)"] = leaderboard_df["Hard Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
     # make repo url clickable in markdown
+    leaderboard_df["Repo URL"] = leaderboard_df["Repo URL"].apply(lambda x: f"[Link]({x})" if x != "" else x)
     # make agent name bold
+    leaderboard_df["Agent"] = leaderboard_df["Agent"].apply(lambda x: f"**{x}**")
     # sort-by best score
+    leaderboard_df.sort_values(by="Hard Level Accuracy (%)", ascending=False, inplace=True)
+    validated_lb = leaderboard_df[leaderboard_df["validated"] == True].drop(columns=["validated"])
+    unvalidated_lb = leaderboard_df[leaderboard_df["validated"] == False].drop(columns=["validated"])
+    return validated_lb, unvalidated_lb