Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
42f179a
1
Parent(s):
2c9a73e
fix: parsing of validated and unvalidated submissions
Browse files- dabstep_benchmark/leaderboard.py +10 -10
dabstep_benchmark/leaderboard.py
CHANGED
|
@@ -234,7 +234,7 @@ def generate_leaderboard_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
| 234 |
task_scores_df = DATASETS["task_scores"].to_pandas()
|
| 235 |
submissions_df = DATASETS["submissions"].to_pandas()
|
| 236 |
|
| 237 |
-
# get metadata of each
|
| 238 |
submissions_df = (
|
| 239 |
submissions_df.groupby("submission_id")
|
| 240 |
.first()
|
|
@@ -297,23 +297,23 @@ def generate_leaderboard_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
| 297 |
}
|
| 298 |
col_order = [new_col_name for new_col_name in col_map.values()]
|
| 299 |
leaderboard_df.rename(columns=col_map, inplace=True)
|
| 300 |
-
|
| 301 |
|
| 302 |
# formatting
|
| 303 |
# convert scores to %
|
| 304 |
-
|
| 305 |
-
|
| 306 |
|
| 307 |
# make repo url clickable in markdown
|
| 308 |
-
|
| 309 |
|
| 310 |
# make agent name bold
|
| 311 |
-
|
| 312 |
|
| 313 |
# sort-by best score
|
| 314 |
-
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
| 318 |
|
| 319 |
-
return
|
|
|
|
| 234 |
task_scores_df = DATASETS["task_scores"].to_pandas()
|
| 235 |
submissions_df = DATASETS["submissions"].to_pandas()
|
| 236 |
|
| 237 |
+
# get metadata of each submission_id
|
| 238 |
submissions_df = (
|
| 239 |
submissions_df.groupby("submission_id")
|
| 240 |
.first()
|
|
|
|
| 297 |
}
|
| 298 |
col_order = [new_col_name for new_col_name in col_map.values()]
|
| 299 |
leaderboard_df.rename(columns=col_map, inplace=True)
|
| 300 |
+
leaderboard_df = leaderboard_df[col_order]
|
| 301 |
|
| 302 |
# formatting
|
| 303 |
# convert scores to %
|
| 304 |
+
leaderboard_df["Easy Level Accuracy (%)"] = leaderboard_df["Easy Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
|
| 305 |
+
leaderboard_df["Hard Level Accuracy (%)"] = leaderboard_df["Hard Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
|
| 306 |
|
| 307 |
# make repo url clickable in markdown
|
| 308 |
+
leaderboard_df["Repo URL"] = leaderboard_df["Repo URL"].apply(lambda x: f"[Link]({x})" if x != "" else x)
|
| 309 |
|
| 310 |
# make agent name bold
|
| 311 |
+
leaderboard_df["Agent"] = leaderboard_df["Agent"].apply(lambda x: f"**{x}**")
|
| 312 |
|
| 313 |
# sort-by best score
|
| 314 |
+
leaderboard_df.sort_values(by="Hard Level Accuracy (%)", ascending=False, inplace=True)
|
| 315 |
|
| 316 |
+
validated_lb = leaderboard_df[leaderboard_df["validated"] == True].drop(columns=["validated"])
|
| 317 |
+
unvalidated_lb = leaderboard_df[leaderboard_df["validated"] == False].drop(columns=["validated"])
|
| 318 |
|
| 319 |
+
return validated_lb, unvalidated_lb
|