whitphx HF Staff commited on
Commit
3f4c012
Β·
1 Parent(s): a2ce515

Add text output on leaderboard

Browse files
leaderboard/src/leaderboard/app.py CHANGED
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
13
  from leaderboard.data_loader import (
14
  load_benchmark_data,
15
  get_unique_values,
 
 
16
  )
17
  from leaderboard.formatters import apply_formatting
18
 
@@ -120,10 +122,48 @@ def create_leaderboard_ui():
120
  )
121
 
122
  gr.Markdown(
123
- "πŸ’‘ **Tip:** Sort by the **first_timer_score** column to find models that are "
124
- "popular, fast to load, and quick to run - perfect for getting started!"
125
  )
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  with gr.Row():
128
  refresh_btn = gr.Button("πŸ”„ Refresh Data", variant="primary")
129
 
@@ -184,10 +224,9 @@ def create_leaderboard_ui():
184
  "**HuggingFace Metrics:**\n"
185
  "- **downloads**: Total downloads from HuggingFace Hub\n"
186
  "- **likes**: Number of likes on HuggingFace Hub\n\n"
187
- "**First-Timer Score:**\n"
188
- "- **first_timer_score**: 0-100 score combining popularity (40%), load time (30%), and inference time (30%)\n"
189
- "- Higher score = better for first-timers (normalized per task)\n"
190
- "- ⭐⭐⭐ Excellent (80+), ⭐⭐ Good (60+), ⭐ Fair (40+)"
191
  )
192
 
193
  def update_data():
@@ -195,8 +234,15 @@ def create_leaderboard_ui():
195
  new_df = load_data()
196
  formatted_new_df = format_dataframe(new_df)
197
 
 
 
 
 
 
198
  return (
199
  new_df, # Update cached raw data
 
 
200
  formatted_new_df,
201
  gr.update(choices=get_unique_values(new_df, "task")),
202
  gr.update(choices=get_unique_values(new_df, "platform")),
@@ -217,6 +263,8 @@ def create_leaderboard_ui():
217
  fn=update_data,
218
  outputs=[
219
  raw_data_state,
 
 
220
  results_table,
221
  task_filter,
222
  platform_filter,
 
13
  from leaderboard.data_loader import (
14
  load_benchmark_data,
15
  get_unique_values,
16
+ get_webgpu_beginner_friendly_models,
17
+ format_recommended_models_as_markdown,
18
  )
19
  from leaderboard.formatters import apply_formatting
20
 
 
122
  )
123
 
124
  gr.Markdown(
125
+ "πŸ’‘ **Tip:** Use the recommended models section below to find popular models "
126
+ "that are fast to load and quick to run - perfect for getting started!"
127
  )
128
 
129
+ # Recommended models section
130
+ gr.Markdown("## ⭐ Recommended WebGPU Models for Beginners")
131
+ gr.Markdown(
132
+ "These models are selected for being:\n"
133
+ "- **WebGPU compatible** - Work in modern browsers with GPU acceleration\n"
134
+ "- **Beginner-friendly** - Popular, fast to load, and quick to run\n"
135
+ "- Sorted by task type, showing top 3-5 models per task"
136
+ )
137
+
138
+ # Get recommended models
139
+ recommended_models = get_webgpu_beginner_friendly_models(df, limit_per_task=5)
140
+ formatted_recommended = format_dataframe(recommended_models)
141
+ markdown_output = format_recommended_models_as_markdown(recommended_models)
142
+
143
+ recommended_table = gr.DataFrame(
144
+ value=formatted_recommended,
145
+ label="Top WebGPU-Compatible Models by Task",
146
+ interactive=False,
147
+ wrap=True,
148
+ )
149
+
150
+ gr.Markdown("### πŸ“ Markdown Output for llms.txt")
151
+ gr.Markdown(
152
+ "Copy the markdown below to embed in your llms.txt or documentation:"
153
+ )
154
+
155
+ markdown_textbox = gr.Textbox(
156
+ value=markdown_output,
157
+ label="Markdown for llms.txt",
158
+ lines=20,
159
+ max_lines=30,
160
+ show_copy_button=True,
161
+ interactive=False,
162
+ )
163
+
164
+ gr.Markdown("---")
165
+ gr.Markdown("## πŸ” Full Benchmark Results")
166
+
167
  with gr.Row():
168
  refresh_btn = gr.Button("πŸ”„ Refresh Data", variant="primary")
169
 
 
224
  "**HuggingFace Metrics:**\n"
225
  "- **downloads**: Total downloads from HuggingFace Hub\n"
226
  "- **likes**: Number of likes on HuggingFace Hub\n\n"
227
+ "**WebGPU Compatibility:**\n"
228
+ "- Models in the recommended section are all WebGPU compatible\n"
229
+ "- WebGPU enables GPU acceleration in modern browsers"
 
230
  )
231
 
232
  def update_data():
 
234
  new_df = load_data()
235
  formatted_new_df = format_dataframe(new_df)
236
 
237
+ # Update recommended models
238
+ new_recommended = get_webgpu_beginner_friendly_models(new_df, limit_per_task=5)
239
+ formatted_new_recommended = format_dataframe(new_recommended)
240
+ new_markdown = format_recommended_models_as_markdown(new_recommended)
241
+
242
  return (
243
  new_df, # Update cached raw data
244
+ formatted_new_recommended, # Update recommended models
245
+ new_markdown, # Update markdown output
246
  formatted_new_df,
247
  gr.update(choices=get_unique_values(new_df, "task")),
248
  gr.update(choices=get_unique_values(new_df, "platform")),
 
263
  fn=update_data,
264
  outputs=[
265
  raw_data_state,
266
+ recommended_table,
267
+ markdown_textbox,
268
  results_table,
269
  task_filter,
270
  platform_filter,
leaderboard/src/leaderboard/data_loader.py CHANGED
@@ -413,6 +413,191 @@ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -
413
  return result
414
 
415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  def get_unique_values(df: pd.DataFrame, column: str) -> List[str]:
417
  """Get unique values from a column for dropdown choices.
418
 
 
413
  return result
414
 
415
 
416
+ def get_webgpu_beginner_friendly_models(
417
+ df: pd.DataFrame,
418
+ limit_per_task: int = 5
419
+ ) -> pd.DataFrame:
420
+ """Get top beginner-friendly models that are WebGPU compatible, grouped by task.
421
+
422
+ A model is included if it:
423
+ - Has high first_timer_score (popular, fast to load, fast inference)
424
+ - Has successful WebGPU benchmark results (device=webgpu, status=completed)
425
+
426
+ Args:
427
+ df: DataFrame containing benchmark results
428
+ limit_per_task: Maximum number of models to return per task (default: 5)
429
+
430
+ Returns:
431
+ DataFrame with top WebGPU-compatible beginner-friendly models per task
432
+ """
433
+ if df.empty:
434
+ return pd.DataFrame()
435
+
436
+ # Filter for WebGPU benchmarks that completed successfully
437
+ webgpu_filter = (
438
+ (df["device"] == "webgpu") &
439
+ (df["status"] == "completed")
440
+ )
441
+
442
+ # Check if required columns exist
443
+ if "device" not in df.columns or "status" not in df.columns:
444
+ logger.warning("Required columns (device, status) not found in dataframe")
445
+ return pd.DataFrame()
446
+
447
+ filtered = df[webgpu_filter].copy()
448
+
449
+ if filtered.empty:
450
+ logger.warning("No successful WebGPU benchmarks found")
451
+ return pd.DataFrame()
452
+
453
+ # Check if required columns exist
454
+ if "task" not in filtered.columns or "first_timer_score" not in filtered.columns:
455
+ logger.warning("Required columns (task, first_timer_score) not found in filtered dataframe")
456
+ return pd.DataFrame()
457
+
458
+ # Group by task and get top models
459
+ all_results = []
460
+
461
+ for task in filtered["task"].unique():
462
+ task_df = filtered[filtered["task"] == task].copy()
463
+
464
+ if task_df.empty:
465
+ continue
466
+
467
+ # Remove rows with NaN first_timer_score
468
+ task_df = task_df.dropna(subset=["first_timer_score"])
469
+
470
+ if task_df.empty:
471
+ continue
472
+
473
+ # For each model, get the benchmark with the highest first_timer_score
474
+ idx_max_series = task_df.groupby("modelId")["first_timer_score"].idxmax()
475
+ valid_indices = idx_max_series.dropna()
476
+
477
+ if valid_indices.empty:
478
+ continue
479
+
480
+ best_per_model = task_df.loc[valid_indices]
481
+
482
+ # Sort by first_timer_score (descending) and take top N
483
+ top_for_task = best_per_model.sort_values(
484
+ "first_timer_score",
485
+ ascending=False
486
+ ).head(limit_per_task)
487
+
488
+ all_results.append(top_for_task)
489
+
490
+ if not all_results:
491
+ logger.warning("No models found after filtering and grouping")
492
+ return pd.DataFrame()
493
+
494
+ # Combine all results
495
+ result = pd.concat(all_results, ignore_index=True)
496
+
497
+ # Sort by task, then by first_timer_score (descending)
498
+ if "task" in result.columns and "first_timer_score" in result.columns:
499
+ result = result.sort_values(
500
+ ["task", "first_timer_score"],
501
+ ascending=[True, False]
502
+ )
503
+
504
+ return result
505
+
506
+
507
+ def format_recommended_models_as_markdown(df: pd.DataFrame) -> str:
508
+ """Format recommended WebGPU models as markdown for llms.txt embedding.
509
+
510
+ Args:
511
+ df: DataFrame containing recommended models (output from get_webgpu_beginner_friendly_models)
512
+
513
+ Returns:
514
+ Formatted markdown string
515
+ """
516
+ if df.empty:
517
+ return "No recommended models available."
518
+
519
+ markdown_lines = [
520
+ "# Recommended Transformers.js Models (WebGPU Compatible)",
521
+ "",
522
+ "These models are optimized for beginners - popular, fast to load, and WebGPU compatible.",
523
+ "",
524
+ ]
525
+
526
+ # Group by task
527
+ if "task" not in df.columns:
528
+ return "No task information available."
529
+
530
+ for task in sorted(df["task"].unique()):
531
+ task_df = df[df["task"] == task].copy()
532
+
533
+ if task_df.empty:
534
+ continue
535
+
536
+ # Add task header
537
+ markdown_lines.append(f"## {task.title()}")
538
+ markdown_lines.append("")
539
+
540
+ # Sort by first_timer_score descending
541
+ if "first_timer_score" in task_df.columns:
542
+ task_df = task_df.sort_values("first_timer_score", ascending=False)
543
+
544
+ # Add each model
545
+ for idx, row in task_df.iterrows():
546
+ model_id = row.get("modelId", "Unknown")
547
+ score = row.get("first_timer_score", None)
548
+ downloads = row.get("downloads", 0)
549
+ likes = row.get("likes", 0)
550
+ load_time = row.get("load_ms_p50", None)
551
+ infer_time = row.get("first_infer_ms_p50", None)
552
+
553
+ # Model entry
554
+ markdown_lines.append(f"### {model_id}")
555
+ markdown_lines.append("")
556
+
557
+ # WebGPU compatibility
558
+ markdown_lines.append("**WebGPU Compatible:** βœ… Yes")
559
+ markdown_lines.append("")
560
+
561
+ # Metrics
562
+ metrics = []
563
+ if load_time is not None:
564
+ metrics.append(f"Load: {load_time:.1f}ms")
565
+ if infer_time is not None:
566
+ metrics.append(f"Inference: {infer_time:.1f}ms")
567
+ if downloads:
568
+ if downloads >= 1_000_000:
569
+ downloads_str = f"{downloads / 1_000_000:.1f}M"
570
+ elif downloads >= 1_000:
571
+ downloads_str = f"{downloads / 1_000:.1f}k"
572
+ else:
573
+ downloads_str = str(downloads)
574
+ metrics.append(f"Downloads: {downloads_str}")
575
+ if likes:
576
+ metrics.append(f"Likes: {likes}")
577
+
578
+ if metrics:
579
+ markdown_lines.append(f"**Metrics:** {' | '.join(metrics)}")
580
+
581
+ markdown_lines.append("")
582
+
583
+ markdown_lines.append("---")
584
+ markdown_lines.append("")
585
+
586
+ # Add footer
587
+ markdown_lines.extend([
588
+ "## About These Recommendations",
589
+ "",
590
+ "Models are selected based on:",
591
+ "- **Popularity**: Downloads and likes from HuggingFace Hub",
592
+ "- **Performance**: Fast loading and inference times",
593
+ "- **Compatibility**: All models have successful WebGPU benchmark results",
594
+ "",
595
+ "These models are recommended for beginners getting started with Transformers.js.",
596
+ ])
597
+
598
+ return "\n".join(markdown_lines)
599
+
600
+
601
  def get_unique_values(df: pd.DataFrame, column: str) -> List[str]:
602
  """Get unique values from a column for dropdown choices.
603