Spaces:
Running
Running
add cache leaderboard
Browse files
app.py
CHANGED
|
@@ -362,7 +362,36 @@ def load_content_from_hf(repo_name="SWE-Arena/model_votes"):
|
|
| 362 |
raise Exception("Error loading feedback data from Hugging Face repository.")
|
| 363 |
|
| 364 |
|
| 365 |
-
def get_leaderboard_data(vote_entry=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
# Load feedback data from the Hugging Face repository
|
| 367 |
vote_data = load_content_from_hf()
|
| 368 |
vote_df = pd.DataFrame(vote_data)
|
|
@@ -481,26 +510,26 @@ def get_leaderboard_data(vote_entry=None):
|
|
| 481 |
vote_df["left"], vote_df["right"], vote_df["winner"], tie_weight=0
|
| 482 |
)
|
| 483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
# Calculate CEI results
|
| 485 |
cei_result = {}
|
| 486 |
for model in elo_result.scores.index:
|
| 487 |
if model in model_stats and model_stats[model]["cei_max"] > 0:
|
| 488 |
-
cei_result[model] = round(
|
| 489 |
-
model_stats[model]["cei_sum"] / model_stats[model]["cei_max"], 2
|
| 490 |
-
)
|
| 491 |
else:
|
| 492 |
-
cei_result[model] =
|
| 493 |
cei_result = pd.Series(cei_result)
|
| 494 |
|
| 495 |
# Calculate MCS results
|
| 496 |
mcs_result = {}
|
| 497 |
for model in elo_result.scores.index:
|
| 498 |
if model in model_stats and model_stats[model]["self_matches"] > 0:
|
| 499 |
-
mcs_result[model] = round(
|
| 500 |
-
model_stats[model]["self_draws"] / model_stats[model]["self_matches"], 2
|
| 501 |
-
)
|
| 502 |
else:
|
| 503 |
-
mcs_result[model] =
|
| 504 |
mcs_result = pd.Series(mcs_result)
|
| 505 |
|
| 506 |
# Combine all results into a single DataFrame
|
|
@@ -539,8 +568,25 @@ def get_leaderboard_data(vote_entry=None):
|
|
| 539 |
leaderboard_data = leaderboard_data[
|
| 540 |
["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]
|
| 541 |
]
|
| 542 |
-
return leaderboard_data
|
| 543 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
|
| 545 |
# Function to enable or disable submit buttons based on textbox content
|
| 546 |
def toggle_submit_button(text):
|
|
@@ -567,7 +613,7 @@ with gr.Blocks() as app:
|
|
| 567 |
)
|
| 568 |
# Initialize the leaderboard with the DataFrame containing the expected columns
|
| 569 |
leaderboard_component = Leaderboard(
|
| 570 |
-
value=get_leaderboard_data(),
|
| 571 |
select_columns=[
|
| 572 |
"Rank",
|
| 573 |
"Model",
|
|
|
|
| 362 |
raise Exception("Error loading feedback data from Hugging Face repository.")
|
| 363 |
|
| 364 |
|
| 365 |
+
def get_leaderboard_data(vote_entry=None, use_cache=True):
|
| 366 |
+
year = str(datetime.now().year)
|
| 367 |
+
|
| 368 |
+
# Try to load cached leaderboard first
|
| 369 |
+
if use_cache:
|
| 370 |
+
try:
|
| 371 |
+
cached_path = hf_hub_download(
|
| 372 |
+
repo_id="SWE-Arena/model_leaderboards",
|
| 373 |
+
filename=f"{year}.json",
|
| 374 |
+
repo_type="dataset"
|
| 375 |
+
)
|
| 376 |
+
with open(cached_path, "r") as f:
|
| 377 |
+
leaderboard_data = pd.read_json(f)
|
| 378 |
+
# Round all numeric columns to two decimal places
|
| 379 |
+
leaderboard_data = leaderboard_data.round(
|
| 380 |
+
{
|
| 381 |
+
"Elo Score": 2,
|
| 382 |
+
"Conversation Efficiency Index": 2,
|
| 383 |
+
"Model Consistency Score": 2,
|
| 384 |
+
"Average Win Rate": 2,
|
| 385 |
+
"Bradley-Terry Coefficient": 2,
|
| 386 |
+
"Eigenvector Centrality Value": 2,
|
| 387 |
+
"Newman Modularity Score": 2,
|
| 388 |
+
"PageRank Score": 2,
|
| 389 |
+
}
|
| 390 |
+
)
|
| 391 |
+
return leaderboard_data
|
| 392 |
+
except Exception as e:
|
| 393 |
+
print(f"No cached leaderboard found for {year}, computing from votes...")
|
| 394 |
+
|
| 395 |
# Load feedback data from the Hugging Face repository
|
| 396 |
vote_data = load_content_from_hf()
|
| 397 |
vote_df = pd.DataFrame(vote_data)
|
|
|
|
| 510 |
vote_df["left"], vote_df["right"], vote_df["winner"], tie_weight=0
|
| 511 |
)
|
| 512 |
|
| 513 |
+
# Clean up potential inf/NaN values in the results
|
| 514 |
+
for result in [avr_result, bt_result, newman_result, eigen_result, elo_result, pagerank_result]:
|
| 515 |
+
result.scores = result.scores.replace([float('inf'), float('-inf')], float('nan'))
|
| 516 |
+
|
| 517 |
# Calculate CEI results
|
| 518 |
cei_result = {}
|
| 519 |
for model in elo_result.scores.index:
|
| 520 |
if model in model_stats and model_stats[model]["cei_max"] > 0:
|
| 521 |
+
cei_result[model] = round(model_stats[model]["cei_sum"] / model_stats[model]["cei_max"], 2)
|
|
|
|
|
|
|
| 522 |
else:
|
| 523 |
+
cei_result[model] = None
|
| 524 |
cei_result = pd.Series(cei_result)
|
| 525 |
|
| 526 |
# Calculate MCS results
|
| 527 |
mcs_result = {}
|
| 528 |
for model in elo_result.scores.index:
|
| 529 |
if model in model_stats and model_stats[model]["self_matches"] > 0:
|
| 530 |
+
mcs_result[model] = round(model_stats[model]["self_draws"] / model_stats[model]["self_matches"], 2)
|
|
|
|
|
|
|
| 531 |
else:
|
| 532 |
+
mcs_result[model] = None
|
| 533 |
mcs_result = pd.Series(mcs_result)
|
| 534 |
|
| 535 |
# Combine all results into a single DataFrame
|
|
|
|
| 568 |
leaderboard_data = leaderboard_data[
|
| 569 |
["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]
|
| 570 |
]
|
|
|
|
| 571 |
|
| 572 |
+
# Save leaderboard data if this is a new vote
|
| 573 |
+
if vote_entry is not None:
|
| 574 |
+
try:
|
| 575 |
+
# Convert DataFrame to JSON and save
|
| 576 |
+
json_content = leaderboard_data.to_json(orient='records', indent=4).encode('utf-8')
|
| 577 |
+
file_like_object = io.BytesIO(json_content)
|
| 578 |
+
|
| 579 |
+
upload_file(
|
| 580 |
+
path_or_fileobj=file_like_object,
|
| 581 |
+
path_in_repo=f"{year}.json",
|
| 582 |
+
repo_id="SWE-Arena/model_leaderboards",
|
| 583 |
+
repo_type="dataset",
|
| 584 |
+
use_auth_token=HfFolder.get_token()
|
| 585 |
+
)
|
| 586 |
+
except Exception as e:
|
| 587 |
+
print(f"Failed to save leaderboard cache: {e}")
|
| 588 |
+
|
| 589 |
+
return leaderboard_data
|
| 590 |
|
| 591 |
# Function to enable or disable submit buttons based on textbox content
|
| 592 |
def toggle_submit_button(text):
|
|
|
|
| 613 |
)
|
| 614 |
# Initialize the leaderboard with the DataFrame containing the expected columns
|
| 615 |
leaderboard_component = Leaderboard(
|
| 616 |
+
value=get_leaderboard_data(use_cache=True),
|
| 617 |
select_columns=[
|
| 618 |
"Rank",
|
| 619 |
"Model",
|