whitphx HF Staff commited on
Commit
9616a74
Β·
1 Parent(s): b89cf6e

fix leaderboard

Browse files
leaderboard/src/leaderboard/app.py CHANGED
@@ -100,6 +100,9 @@ def create_leaderboard_ui():
100
  df = load_data()
101
  formatted_df = format_dataframe(df)
102
 
 
 
 
103
  with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
104
  gr.Markdown("# πŸ† Transformers.js Benchmark Leaderboard")
105
  gr.Markdown(
@@ -197,6 +200,7 @@ def create_leaderboard_ui():
197
  formatted_new_first_timer = format_dataframe(new_first_timer)
198
 
199
  return (
 
200
  formatted_new_first_timer,
201
  formatted_new_df,
202
  gr.update(choices=get_unique_values(new_df, "task")),
@@ -206,10 +210,9 @@ def create_leaderboard_ui():
206
  gr.update(choices=get_unique_values(new_df, "dtype")),
207
  )
208
 
209
- def apply_filters(formatted_df, model, task, platform, device, mode, dtype):
210
  """Apply filters and return filtered DataFrame."""
211
- # Need to reload raw data to filter, then format
212
- raw_df = load_data()
213
  filtered = filter_data(raw_df, model, task, platform, device, mode, dtype)
214
  return format_dataframe(filtered)
215
 
@@ -217,6 +220,7 @@ def create_leaderboard_ui():
217
  refresh_btn.click(
218
  fn=update_data,
219
  outputs=[
 
220
  first_timer_table,
221
  results_table,
222
  task_filter,
@@ -227,9 +231,9 @@ def create_leaderboard_ui():
227
  ],
228
  )
229
 
230
- # Filter inputs update the table
231
  filter_inputs = [
232
- results_table,
233
  model_filter,
234
  task_filter,
235
  platform_filter,
 
100
  df = load_data()
101
  formatted_df = format_dataframe(df)
102
 
103
+ # Cache raw data in Gradio state to avoid reloading on every filter change
104
+ raw_data_state = gr.State(df)
105
+
106
  with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
107
  gr.Markdown("# πŸ† Transformers.js Benchmark Leaderboard")
108
  gr.Markdown(
 
200
  formatted_new_first_timer = format_dataframe(new_first_timer)
201
 
202
  return (
203
+ new_df, # Update cached raw data
204
  formatted_new_first_timer,
205
  formatted_new_df,
206
  gr.update(choices=get_unique_values(new_df, "task")),
 
210
  gr.update(choices=get_unique_values(new_df, "dtype")),
211
  )
212
 
213
+ def apply_filters(raw_df, model, task, platform, device, mode, dtype):
214
  """Apply filters and return filtered DataFrame."""
215
+ # Use cached raw data instead of reloading
 
216
  filtered = filter_data(raw_df, model, task, platform, device, mode, dtype)
217
  return format_dataframe(filtered)
218
 
 
220
  refresh_btn.click(
221
  fn=update_data,
222
  outputs=[
223
+ raw_data_state,
224
  first_timer_table,
225
  results_table,
226
  task_filter,
 
231
  ],
232
  )
233
 
234
+ # Filter inputs update the table (using cached raw data)
235
  filter_inputs = [
236
+ raw_data_state,
237
  model_filter,
238
  task_filter,
239
  platform_filter,
leaderboard/src/leaderboard/data_loader.py CHANGED
@@ -4,10 +4,11 @@ Data loader module for loading benchmark results from HuggingFace Dataset.
4
 
5
  import json
6
  import logging
 
7
  from typing import List, Dict, Any, Optional
8
  from datetime import datetime
9
  import pandas as pd
10
- from huggingface_hub import HfApi, hf_hub_download, list_models
11
 
12
  logger = logging.getLogger(__name__)
13
 
@@ -29,30 +30,32 @@ def load_benchmark_data(
29
  return pd.DataFrame()
30
 
31
  try:
32
- api = HfApi(token=token)
33
-
34
- # List all files in the dataset repo
35
- files = api.list_repo_files(
36
  repo_id=dataset_repo,
37
  repo_type="dataset",
38
  token=token,
39
  )
 
40
 
41
- # Filter for .json files
42
- json_files = [f for f in files if f.endswith(".json")]
 
43
 
44
  if not json_files:
 
45
  return pd.DataFrame()
46
 
 
 
47
  # Load all benchmark results
48
  all_results = []
49
  for file_path in json_files:
50
  try:
51
- result = load_single_benchmark_file(
52
- dataset_repo=dataset_repo,
53
- file_path=file_path,
54
- token=token,
55
- )
56
  if result:
57
  flattened = flatten_result(result)
58
  all_results.append(flattened)
@@ -63,6 +66,8 @@ def load_benchmark_data(
63
  if not all_results:
64
  return pd.DataFrame()
65
 
 
 
66
  # Convert to DataFrame
67
  df = pd.DataFrame(all_results)
68
 
@@ -80,39 +85,6 @@ def load_benchmark_data(
80
  return pd.DataFrame()
81
 
82
 
83
- def load_single_benchmark_file(
84
- dataset_repo: str,
85
- file_path: str,
86
- token: Optional[str] = None,
87
- ) -> Optional[Dict[str, Any]]:
88
- """Load a single benchmark result file from HuggingFace Dataset.
89
-
90
- Args:
91
- dataset_repo: HuggingFace dataset repository ID
92
- file_path: Path to the JSON file within the dataset
93
- token: HuggingFace API token (optional)
94
-
95
- Returns:
96
- Dictionary containing the benchmark result, or None if failed
97
- """
98
- try:
99
- # Download the file
100
- local_path = hf_hub_download(
101
- repo_id=dataset_repo,
102
- filename=file_path,
103
- repo_type="dataset",
104
- token=token,
105
- )
106
-
107
- # Read JSON file (single object per file)
108
- with open(local_path, "r") as f:
109
- return json.load(f)
110
-
111
- except Exception as e:
112
- logger.error(f"Error loading file {file_path}: {e}")
113
- return None
114
-
115
-
116
  def flatten_result(result: Dict[str, Any]) -> Dict[str, Any]:
117
  """Flatten nested benchmark result for display.
118
 
@@ -305,7 +277,13 @@ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -
305
  )
306
 
307
  # Group by model and take best score for each model within this task
308
- best_per_model = task_df.loc[task_df.groupby("modelId")["first_timer_score"].idxmax()]
 
 
 
 
 
 
309
 
310
  # Sort by first-timer score and take top N for this task
311
  top_for_task = best_per_model.sort_values("first_timer_score", ascending=False).head(limit_per_task)
 
4
 
5
  import json
6
  import logging
7
+ from pathlib import Path
8
  from typing import List, Dict, Any, Optional
9
  from datetime import datetime
10
  import pandas as pd
11
+ from huggingface_hub import snapshot_download, list_models
12
 
13
  logger = logging.getLogger(__name__)
14
 
 
30
  return pd.DataFrame()
31
 
32
  try:
33
+ # Download the entire repository snapshot
34
+ logger.info(f"Downloading dataset snapshot from {dataset_repo}...")
35
+ local_dir = snapshot_download(
 
36
  repo_id=dataset_repo,
37
  repo_type="dataset",
38
  token=token,
39
  )
40
+ logger.info(f"Dataset downloaded to {local_dir}")
41
 
42
+ # Find all JSON files in the downloaded directory
43
+ local_path = Path(local_dir)
44
+ json_files = list(local_path.rglob("*.json"))
45
 
46
  if not json_files:
47
+ logger.warning("No JSON files found in dataset")
48
  return pd.DataFrame()
49
 
50
+ logger.info(f"Found {len(json_files)} JSON files")
51
+
52
  # Load all benchmark results
53
  all_results = []
54
  for file_path in json_files:
55
  try:
56
+ with open(file_path, "r") as f:
57
+ result = json.load(f)
58
+
 
 
59
  if result:
60
  flattened = flatten_result(result)
61
  all_results.append(flattened)
 
66
  if not all_results:
67
  return pd.DataFrame()
68
 
69
+ logger.info(f"Loaded {len(all_results)} benchmark results")
70
+
71
  # Convert to DataFrame
72
  df = pd.DataFrame(all_results)
73
 
 
85
  return pd.DataFrame()
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def flatten_result(result: Dict[str, Any]) -> Dict[str, Any]:
89
  """Flatten nested benchmark result for display.
90
 
 
277
  )
278
 
279
  # Group by model and take best score for each model within this task
280
+ # Filter out NaN scores before getting idxmax
281
+ idx_max_series = task_df.groupby("modelId")["first_timer_score"].idxmax()
282
+ # Drop NaN indices
283
+ valid_indices = idx_max_series.dropna()
284
+ if valid_indices.empty:
285
+ continue
286
+ best_per_model = task_df.loc[valid_indices]
287
 
288
  # Sort by first-timer score and take top N for this task
289
  top_for_task = best_per_model.sort_values("first_timer_score", ascending=False).head(limit_per_task)