zhiminy commited on
Commit
625c7c1
·
1 Parent(s): d089f70

add cache leaderboard

Browse files
Files changed (1) hide show
  1. app.py +57 -11
app.py CHANGED
@@ -362,7 +362,36 @@ def load_content_from_hf(repo_name="SWE-Arena/model_votes"):
362
  raise Exception("Error loading feedback data from Hugging Face repository.")
363
 
364
 
365
- def get_leaderboard_data(vote_entry=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  # Load feedback data from the Hugging Face repository
367
  vote_data = load_content_from_hf()
368
  vote_df = pd.DataFrame(vote_data)
@@ -481,26 +510,26 @@ def get_leaderboard_data(vote_entry=None):
481
  vote_df["left"], vote_df["right"], vote_df["winner"], tie_weight=0
482
  )
483
 
 
 
 
 
484
  # Calculate CEI results
485
  cei_result = {}
486
  for model in elo_result.scores.index:
487
  if model in model_stats and model_stats[model]["cei_max"] > 0:
488
- cei_result[model] = round(
489
- model_stats[model]["cei_sum"] / model_stats[model]["cei_max"], 2
490
- )
491
  else:
492
- cei_result[model] = "N/A"
493
  cei_result = pd.Series(cei_result)
494
 
495
  # Calculate MCS results
496
  mcs_result = {}
497
  for model in elo_result.scores.index:
498
  if model in model_stats and model_stats[model]["self_matches"] > 0:
499
- mcs_result[model] = round(
500
- model_stats[model]["self_draws"] / model_stats[model]["self_matches"], 2
501
- )
502
  else:
503
- mcs_result[model] = "N/A"
504
  mcs_result = pd.Series(mcs_result)
505
 
506
  # Combine all results into a single DataFrame
@@ -539,8 +568,25 @@ def get_leaderboard_data(vote_entry=None):
539
  leaderboard_data = leaderboard_data[
540
  ["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]
541
  ]
542
- return leaderboard_data
543
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
  # Function to enable or disable submit buttons based on textbox content
546
  def toggle_submit_button(text):
@@ -567,7 +613,7 @@ with gr.Blocks() as app:
567
  )
568
  # Initialize the leaderboard with the DataFrame containing the expected columns
569
  leaderboard_component = Leaderboard(
570
- value=get_leaderboard_data(),
571
  select_columns=[
572
  "Rank",
573
  "Model",
 
362
  raise Exception("Error loading feedback data from Hugging Face repository.")
363
 
364
 
365
+ def get_leaderboard_data(vote_entry=None, use_cache=True):
366
+ year = str(datetime.now().year)
367
+
368
+ # Try to load cached leaderboard first
369
+ if use_cache:
370
+ try:
371
+ cached_path = hf_hub_download(
372
+ repo_id="SWE-Arena/model_leaderboards",
373
+ filename=f"{year}.json",
374
+ repo_type="dataset"
375
+ )
376
+ with open(cached_path, "r") as f:
377
+ leaderboard_data = pd.read_json(f)
378
+ # Round all numeric columns to two decimal places
379
+ leaderboard_data = leaderboard_data.round(
380
+ {
381
+ "Elo Score": 2,
382
+ "Conversation Efficiency Index": 2,
383
+ "Model Consistency Score": 2,
384
+ "Average Win Rate": 2,
385
+ "Bradley-Terry Coefficient": 2,
386
+ "Eigenvector Centrality Value": 2,
387
+ "Newman Modularity Score": 2,
388
+ "PageRank Score": 2,
389
+ }
390
+ )
391
+ return leaderboard_data
392
+ except Exception as e:
393
+ print(f"No cached leaderboard found for {year}, computing from votes...")
394
+
395
  # Load feedback data from the Hugging Face repository
396
  vote_data = load_content_from_hf()
397
  vote_df = pd.DataFrame(vote_data)
 
510
  vote_df["left"], vote_df["right"], vote_df["winner"], tie_weight=0
511
  )
512
 
513
+ # Clean up potential inf/NaN values in the results
514
+ for result in [avr_result, bt_result, newman_result, eigen_result, elo_result, pagerank_result]:
515
+ result.scores = result.scores.replace([float('inf'), float('-inf')], float('nan'))
516
+
517
  # Calculate CEI results
518
  cei_result = {}
519
  for model in elo_result.scores.index:
520
  if model in model_stats and model_stats[model]["cei_max"] > 0:
521
+ cei_result[model] = round(model_stats[model]["cei_sum"] / model_stats[model]["cei_max"], 2)
 
 
522
  else:
523
+ cei_result[model] = None
524
  cei_result = pd.Series(cei_result)
525
 
526
  # Calculate MCS results
527
  mcs_result = {}
528
  for model in elo_result.scores.index:
529
  if model in model_stats and model_stats[model]["self_matches"] > 0:
530
+ mcs_result[model] = round(model_stats[model]["self_draws"] / model_stats[model]["self_matches"], 2)
 
 
531
  else:
532
+ mcs_result[model] = None
533
  mcs_result = pd.Series(mcs_result)
534
 
535
  # Combine all results into a single DataFrame
 
568
  leaderboard_data = leaderboard_data[
569
  ["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]
570
  ]
 
571
 
572
+ # Save leaderboard data if this is a new vote
573
+ if vote_entry is not None:
574
+ try:
575
+ # Convert DataFrame to JSON and save
576
+ json_content = leaderboard_data.to_json(orient='records', indent=4).encode('utf-8')
577
+ file_like_object = io.BytesIO(json_content)
578
+
579
+ upload_file(
580
+ path_or_fileobj=file_like_object,
581
+ path_in_repo=f"{year}.json",
582
+ repo_id="SWE-Arena/model_leaderboards",
583
+ repo_type="dataset",
584
+ use_auth_token=HfFolder.get_token()
585
+ )
586
+ except Exception as e:
587
+ print(f"Failed to save leaderboard cache: {e}")
588
+
589
+ return leaderboard_data
590
 
591
  # Function to enable or disable submit buttons based on textbox content
592
  def toggle_submit_button(text):
 
613
  )
614
  # Initialize the leaderboard with the DataFrame containing the expected columns
615
  leaderboard_component = Leaderboard(
616
+ value=get_leaderboard_data(use_cache=True),
617
  select_columns=[
618
  "Rank",
619
  "Model",