whitphx HF Staff commited on
Commit
9a01936
·
1 Parent(s): 8ec48ea

Display first-timer-friendly models

Browse files
leaderboard/src/leaderboard/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
  """Transformers.js Benchmark Leaderboard"""
2
 
3
  from .app import create_leaderboard_ui
4
- from .data_loader import load_benchmark_data, get_unique_values, flatten_result
5
  from .formatters import apply_formatting
6
 
7
  __version__ = "0.1.0"
@@ -10,5 +10,6 @@ __all__ = [
10
  "load_benchmark_data",
11
  "get_unique_values",
12
  "flatten_result",
 
13
  "apply_formatting",
14
  ]
 
1
  """Transformers.js Benchmark Leaderboard"""
2
 
3
  from .app import create_leaderboard_ui
4
+ from .data_loader import load_benchmark_data, get_unique_values, flatten_result, get_first_timer_friendly_models
5
  from .formatters import apply_formatting
6
 
7
  __version__ = "0.1.0"
 
10
  "load_benchmark_data",
11
  "get_unique_values",
12
  "flatten_result",
13
+ "get_first_timer_friendly_models",
14
  "apply_formatting",
15
  ]
leaderboard/src/leaderboard/app.py CHANGED
@@ -13,6 +13,7 @@ from dotenv import load_dotenv
13
  from leaderboard.data_loader import (
14
  load_benchmark_data,
15
  get_unique_values,
 
16
  )
17
  from leaderboard.formatters import apply_formatting
18
 
@@ -37,13 +38,17 @@ def load_data() -> pd.DataFrame:
37
  token=HF_TOKEN,
38
  )
39
 
40
- # Apply formatting to each row
41
- if not df.empty:
42
- df = df.apply(lambda row: pd.Series(apply_formatting(row.to_dict())), axis=1)
43
-
44
  return df
45
 
46
 
 
 
 
 
 
 
 
 
47
  def filter_data(
48
  df: pd.DataFrame,
49
  model_filter: str,
@@ -93,6 +98,7 @@ def create_leaderboard_ui():
93
 
94
  # Load initial data
95
  df = load_data()
 
96
 
97
  with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
98
  gr.Markdown("# 🏆 Transformers.js Benchmark Leaderboard")
@@ -106,6 +112,24 @@ def create_leaderboard_ui():
106
  "Please set the environment variable to load benchmark data."
107
  )
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  with gr.Row():
110
  refresh_btn = gr.Button("🔄 Refresh Data", variant="primary")
111
 
@@ -145,8 +169,8 @@ def create_leaderboard_ui():
145
  )
146
 
147
  results_table = gr.DataFrame(
148
- value=df,
149
- label="Benchmark Results",
150
  interactive=False,
151
  wrap=True,
152
  )
@@ -166,8 +190,15 @@ def create_leaderboard_ui():
166
  def update_data():
167
  """Reload data from HuggingFace."""
168
  new_df = load_data()
 
 
 
 
 
 
169
  return (
170
- new_df,
 
171
  gr.update(choices=get_unique_values(new_df, "task")),
172
  gr.update(choices=get_unique_values(new_df, "platform")),
173
  gr.update(choices=get_unique_values(new_df, "device")),
@@ -175,14 +206,18 @@ def create_leaderboard_ui():
175
  gr.update(choices=get_unique_values(new_df, "dtype")),
176
  )
177
 
178
- def apply_filters(df, model, task, platform, device, mode, dtype):
179
  """Apply filters and return filtered DataFrame."""
180
- return filter_data(df, model, task, platform, device, mode, dtype)
 
 
 
181
 
182
  # Refresh button updates data and resets filters
183
  refresh_btn.click(
184
  fn=update_data,
185
  outputs=[
 
186
  results_table,
187
  task_filter,
188
  platform_filter,
 
13
  from leaderboard.data_loader import (
14
  load_benchmark_data,
15
  get_unique_values,
16
+ get_first_timer_friendly_models,
17
  )
18
  from leaderboard.formatters import apply_formatting
19
 
 
38
  token=HF_TOKEN,
39
  )
40
 
 
 
 
 
41
  return df
42
 
43
 
44
+ def format_dataframe(df: pd.DataFrame) -> pd.DataFrame:
45
+ """Apply formatting to dataframe for display."""
46
+ if df.empty:
47
+ return df
48
+
49
+ return df.apply(lambda row: pd.Series(apply_formatting(row.to_dict())), axis=1)
50
+
51
+
52
  def filter_data(
53
  df: pd.DataFrame,
54
  model_filter: str,
 
98
 
99
  # Load initial data
100
  df = load_data()
101
+ formatted_df = format_dataframe(df)
102
 
103
  with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
104
  gr.Markdown("# 🏆 Transformers.js Benchmark Leaderboard")
 
112
  "Please set the environment variable to load benchmark data."
113
  )
114
 
115
+ # First-timer-friendly models section
116
+ with gr.Accordion("✨ First-Timer-Friendly Models", open=True):
117
+ gr.Markdown(
118
+ "These models are great for first-timers! They're popular, fast to load, "
119
+ "and quick to run. Perfect for getting started with Transformers.js.\n\n"
120
+ "**Showing top 3 models per task type.**"
121
+ )
122
+
123
+ first_timer_models = get_first_timer_friendly_models(df, limit_per_task=3)
124
+ formatted_first_timer = format_dataframe(first_timer_models)
125
+
126
+ first_timer_table = gr.DataFrame(
127
+ value=formatted_first_timer,
128
+ label="Top First-Timer-Friendly Models (by Task)",
129
+ interactive=False,
130
+ wrap=True,
131
+ )
132
+
133
  with gr.Row():
134
  refresh_btn = gr.Button("🔄 Refresh Data", variant="primary")
135
 
 
169
  )
170
 
171
  results_table = gr.DataFrame(
172
+ value=formatted_df,
173
+ label="All Benchmark Results",
174
  interactive=False,
175
  wrap=True,
176
  )
 
190
  def update_data():
191
  """Reload data from HuggingFace."""
192
  new_df = load_data()
193
+ formatted_new_df = format_dataframe(new_df)
194
+
195
+ # Update first-timer-friendly models (3 per task)
196
+ new_first_timer = get_first_timer_friendly_models(new_df, limit_per_task=3)
197
+ formatted_new_first_timer = format_dataframe(new_first_timer)
198
+
199
  return (
200
+ formatted_new_first_timer,
201
+ formatted_new_df,
202
  gr.update(choices=get_unique_values(new_df, "task")),
203
  gr.update(choices=get_unique_values(new_df, "platform")),
204
  gr.update(choices=get_unique_values(new_df, "device")),
 
206
  gr.update(choices=get_unique_values(new_df, "dtype")),
207
  )
208
 
209
+ def apply_filters(formatted_df, model, task, platform, device, mode, dtype):
210
  """Apply filters and return filtered DataFrame."""
211
+ # Need to reload raw data to filter, then format
212
+ raw_df = load_data()
213
+ filtered = filter_data(raw_df, model, task, platform, device, mode, dtype)
214
+ return format_dataframe(filtered)
215
 
216
  # Refresh button updates data and resets filters
217
  refresh_btn.click(
218
  fn=update_data,
219
  outputs=[
220
+ first_timer_table,
221
  results_table,
222
  task_filter,
223
  platform_filter,
leaderboard/src/leaderboard/data_loader.py CHANGED
@@ -226,6 +226,109 @@ def enrich_with_hf_metadata(df: pd.DataFrame) -> pd.DataFrame:
226
  return df
227
 
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  def get_unique_values(df: pd.DataFrame, column: str) -> List[str]:
230
  """Get unique values from a column for dropdown choices.
231
 
 
226
  return df
227
 
228
 
229
+ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
230
+ """Identify first-timer-friendly models based on popularity and performance, grouped by task.
231
+
232
+ A model is considered first-timer-friendly if it:
233
+ - Has high downloads (popular)
234
+ - Has fast load times (easy to start)
235
+ - Has fast inference times (quick results)
236
+ - Successfully completed benchmarks
237
+
238
+ Args:
239
+ df: DataFrame containing benchmark results
240
+ limit_per_task: Maximum number of models to return per task
241
+
242
+ Returns:
243
+ DataFrame with top first-timer-friendly models per task
244
+ """
245
+ if df.empty:
246
+ return pd.DataFrame()
247
+
248
+ # Filter only successful benchmarks
249
+ filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
250
+
251
+ if filtered.empty:
252
+ return pd.DataFrame()
253
+
254
+ # Check if task column exists
255
+ if "task" not in filtered.columns:
256
+ logger.warning("Task column not found in dataframe")
257
+ return pd.DataFrame()
258
+
259
+ # Calculate first-timer-friendliness score per task
260
+ all_results = []
261
+
262
+ for task in filtered["task"].unique():
263
+ task_df = filtered[filtered["task"] == task].copy()
264
+
265
+ if task_df.empty:
266
+ continue
267
+
268
+ # Normalize metrics within this task (lower is better for times, higher is better for popularity)
269
+
270
+ # Downloads score (0-1, higher is better)
271
+ if "downloads" in task_df.columns:
272
+ max_downloads = task_df["downloads"].max()
273
+ task_df["downloads_score"] = task_df["downloads"] / max_downloads if max_downloads > 0 else 0
274
+ else:
275
+ task_df["downloads_score"] = 0
276
+
277
+ # Likes score (0-1, higher is better)
278
+ if "likes" in task_df.columns:
279
+ max_likes = task_df["likes"].max()
280
+ task_df["likes_score"] = task_df["likes"] / max_likes if max_likes > 0 else 0
281
+ else:
282
+ task_df["likes_score"] = 0
283
+
284
+ # Load time score (0-1, lower time is better)
285
+ if "load_ms_p50" in task_df.columns:
286
+ max_load = task_df["load_ms_p50"].max()
287
+ task_df["load_score"] = 1 - (task_df["load_ms_p50"] / max_load) if max_load > 0 else 0
288
+ else:
289
+ task_df["load_score"] = 0
290
+
291
+ # Inference time score (0-1, lower time is better)
292
+ if "first_infer_ms_p50" in task_df.columns:
293
+ max_infer = task_df["first_infer_ms_p50"].max()
294
+ task_df["infer_score"] = 1 - (task_df["first_infer_ms_p50"] / max_infer) if max_infer > 0 else 0
295
+ else:
296
+ task_df["infer_score"] = 0
297
+
298
+ # Calculate weighted first-timer-friendliness score
299
+ # Weights: popularity (40%), load time (30%), inference time (30%)
300
+ task_df["first_timer_score"] = (
301
+ (task_df["downloads_score"] * 0.25) +
302
+ (task_df["likes_score"] * 0.15) +
303
+ (task_df["load_score"] * 0.30) +
304
+ (task_df["infer_score"] * 0.30)
305
+ )
306
+
307
+ # Group by model and take best score for each model within this task
308
+ best_per_model = task_df.loc[task_df.groupby("modelId")["first_timer_score"].idxmax()]
309
+
310
+ # Sort by first-timer score and take top N for this task
311
+ top_for_task = best_per_model.sort_values("first_timer_score", ascending=False).head(limit_per_task)
312
+
313
+ # Drop intermediate scoring columns
314
+ score_cols = ["downloads_score", "likes_score", "load_score", "infer_score", "first_timer_score"]
315
+ top_for_task = top_for_task.drop(columns=[col for col in score_cols if col in top_for_task.columns])
316
+
317
+ all_results.append(top_for_task)
318
+
319
+ if not all_results:
320
+ return pd.DataFrame()
321
+
322
+ # Combine all results
323
+ result = pd.concat(all_results, ignore_index=True)
324
+
325
+ # Sort by task name for better organization
326
+ if "task" in result.columns:
327
+ result = result.sort_values("task")
328
+
329
+ return result
330
+
331
+
332
  def get_unique_values(df: pd.DataFrame, column: str) -> List[str]:
333
  """Get unique values from a column for dropdown choices.
334