Spaces:
Runtime error
Runtime error
Add text output on leaderboard
Browse files
leaderboard/src/leaderboard/app.py
CHANGED
|
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
|
|
| 13 |
from leaderboard.data_loader import (
|
| 14 |
load_benchmark_data,
|
| 15 |
get_unique_values,
|
|
|
|
|
|
|
| 16 |
)
|
| 17 |
from leaderboard.formatters import apply_formatting
|
| 18 |
|
|
@@ -120,10 +122,48 @@ def create_leaderboard_ui():
|
|
| 120 |
)
|
| 121 |
|
| 122 |
gr.Markdown(
|
| 123 |
-
"π‘ **Tip:**
|
| 124 |
-
"
|
| 125 |
)
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
with gr.Row():
|
| 128 |
refresh_btn = gr.Button("π Refresh Data", variant="primary")
|
| 129 |
|
|
@@ -184,10 +224,9 @@ def create_leaderboard_ui():
|
|
| 184 |
"**HuggingFace Metrics:**\n"
|
| 185 |
"- **downloads**: Total downloads from HuggingFace Hub\n"
|
| 186 |
"- **likes**: Number of likes on HuggingFace Hub\n\n"
|
| 187 |
-
"**
|
| 188 |
-
"-
|
| 189 |
-
"-
|
| 190 |
-
"- βββ Excellent (80+), ββ Good (60+), β Fair (40+)"
|
| 191 |
)
|
| 192 |
|
| 193 |
def update_data():
|
|
@@ -195,8 +234,15 @@ def create_leaderboard_ui():
|
|
| 195 |
new_df = load_data()
|
| 196 |
formatted_new_df = format_dataframe(new_df)
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
return (
|
| 199 |
new_df, # Update cached raw data
|
|
|
|
|
|
|
| 200 |
formatted_new_df,
|
| 201 |
gr.update(choices=get_unique_values(new_df, "task")),
|
| 202 |
gr.update(choices=get_unique_values(new_df, "platform")),
|
|
@@ -217,6 +263,8 @@ def create_leaderboard_ui():
|
|
| 217 |
fn=update_data,
|
| 218 |
outputs=[
|
| 219 |
raw_data_state,
|
|
|
|
|
|
|
| 220 |
results_table,
|
| 221 |
task_filter,
|
| 222 |
platform_filter,
|
|
|
|
| 13 |
from leaderboard.data_loader import (
|
| 14 |
load_benchmark_data,
|
| 15 |
get_unique_values,
|
| 16 |
+
get_webgpu_beginner_friendly_models,
|
| 17 |
+
format_recommended_models_as_markdown,
|
| 18 |
)
|
| 19 |
from leaderboard.formatters import apply_formatting
|
| 20 |
|
|
|
|
| 122 |
)
|
| 123 |
|
| 124 |
gr.Markdown(
|
| 125 |
+
"π‘ **Tip:** Use the recommended models section below to find popular models "
|
| 126 |
+
"that are fast to load and quick to run - perfect for getting started!"
|
| 127 |
)
|
| 128 |
|
| 129 |
+
# Recommended models section
|
| 130 |
+
gr.Markdown("## β Recommended WebGPU Models for Beginners")
|
| 131 |
+
gr.Markdown(
|
| 132 |
+
"These models are selected for being:\n"
|
| 133 |
+
"- **WebGPU compatible** - Work in modern browsers with GPU acceleration\n"
|
| 134 |
+
"- **Beginner-friendly** - Popular, fast to load, and quick to run\n"
|
| 135 |
+
"- Sorted by task type, showing top 3-5 models per task"
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Get recommended models
|
| 139 |
+
recommended_models = get_webgpu_beginner_friendly_models(df, limit_per_task=5)
|
| 140 |
+
formatted_recommended = format_dataframe(recommended_models)
|
| 141 |
+
markdown_output = format_recommended_models_as_markdown(recommended_models)
|
| 142 |
+
|
| 143 |
+
recommended_table = gr.DataFrame(
|
| 144 |
+
value=formatted_recommended,
|
| 145 |
+
label="Top WebGPU-Compatible Models by Task",
|
| 146 |
+
interactive=False,
|
| 147 |
+
wrap=True,
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
gr.Markdown("### π Markdown Output for llms.txt")
|
| 151 |
+
gr.Markdown(
|
| 152 |
+
"Copy the markdown below to embed in your llms.txt or documentation:"
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
markdown_textbox = gr.Textbox(
|
| 156 |
+
value=markdown_output,
|
| 157 |
+
label="Markdown for llms.txt",
|
| 158 |
+
lines=20,
|
| 159 |
+
max_lines=30,
|
| 160 |
+
show_copy_button=True,
|
| 161 |
+
interactive=False,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
gr.Markdown("---")
|
| 165 |
+
gr.Markdown("## π Full Benchmark Results")
|
| 166 |
+
|
| 167 |
with gr.Row():
|
| 168 |
refresh_btn = gr.Button("π Refresh Data", variant="primary")
|
| 169 |
|
|
|
|
| 224 |
"**HuggingFace Metrics:**\n"
|
| 225 |
"- **downloads**: Total downloads from HuggingFace Hub\n"
|
| 226 |
"- **likes**: Number of likes on HuggingFace Hub\n\n"
|
| 227 |
+
"**WebGPU Compatibility:**\n"
|
| 228 |
+
"- Models in the recommended section are all WebGPU compatible\n"
|
| 229 |
+
"- WebGPU enables GPU acceleration in modern browsers"
|
|
|
|
| 230 |
)
|
| 231 |
|
| 232 |
def update_data():
|
|
|
|
| 234 |
new_df = load_data()
|
| 235 |
formatted_new_df = format_dataframe(new_df)
|
| 236 |
|
| 237 |
+
# Update recommended models
|
| 238 |
+
new_recommended = get_webgpu_beginner_friendly_models(new_df, limit_per_task=5)
|
| 239 |
+
formatted_new_recommended = format_dataframe(new_recommended)
|
| 240 |
+
new_markdown = format_recommended_models_as_markdown(new_recommended)
|
| 241 |
+
|
| 242 |
return (
|
| 243 |
new_df, # Update cached raw data
|
| 244 |
+
formatted_new_recommended, # Update recommended models
|
| 245 |
+
new_markdown, # Update markdown output
|
| 246 |
formatted_new_df,
|
| 247 |
gr.update(choices=get_unique_values(new_df, "task")),
|
| 248 |
gr.update(choices=get_unique_values(new_df, "platform")),
|
|
|
|
| 263 |
fn=update_data,
|
| 264 |
outputs=[
|
| 265 |
raw_data_state,
|
| 266 |
+
recommended_table,
|
| 267 |
+
markdown_textbox,
|
| 268 |
results_table,
|
| 269 |
task_filter,
|
| 270 |
platform_filter,
|
leaderboard/src/leaderboard/data_loader.py
CHANGED
|
@@ -413,6 +413,191 @@ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -
|
|
| 413 |
return result
|
| 414 |
|
| 415 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
def get_unique_values(df: pd.DataFrame, column: str) -> List[str]:
|
| 417 |
"""Get unique values from a column for dropdown choices.
|
| 418 |
|
|
|
|
| 413 |
return result
|
| 414 |
|
| 415 |
|
| 416 |
+
def get_webgpu_beginner_friendly_models(
|
| 417 |
+
df: pd.DataFrame,
|
| 418 |
+
limit_per_task: int = 5
|
| 419 |
+
) -> pd.DataFrame:
|
| 420 |
+
"""Get top beginner-friendly models that are WebGPU compatible, grouped by task.
|
| 421 |
+
|
| 422 |
+
A model is included if it:
|
| 423 |
+
- Has high first_timer_score (popular, fast to load, fast inference)
|
| 424 |
+
- Has successful WebGPU benchmark results (device=webgpu, status=completed)
|
| 425 |
+
|
| 426 |
+
Args:
|
| 427 |
+
df: DataFrame containing benchmark results
|
| 428 |
+
limit_per_task: Maximum number of models to return per task (default: 5)
|
| 429 |
+
|
| 430 |
+
Returns:
|
| 431 |
+
DataFrame with top WebGPU-compatible beginner-friendly models per task
|
| 432 |
+
"""
|
| 433 |
+
if df.empty:
|
| 434 |
+
return pd.DataFrame()
|
| 435 |
+
|
| 436 |
+
# Filter for WebGPU benchmarks that completed successfully
|
| 437 |
+
webgpu_filter = (
|
| 438 |
+
(df["device"] == "webgpu") &
|
| 439 |
+
(df["status"] == "completed")
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
# Check if required columns exist
|
| 443 |
+
if "device" not in df.columns or "status" not in df.columns:
|
| 444 |
+
logger.warning("Required columns (device, status) not found in dataframe")
|
| 445 |
+
return pd.DataFrame()
|
| 446 |
+
|
| 447 |
+
filtered = df[webgpu_filter].copy()
|
| 448 |
+
|
| 449 |
+
if filtered.empty:
|
| 450 |
+
logger.warning("No successful WebGPU benchmarks found")
|
| 451 |
+
return pd.DataFrame()
|
| 452 |
+
|
| 453 |
+
# Check if required columns exist
|
| 454 |
+
if "task" not in filtered.columns or "first_timer_score" not in filtered.columns:
|
| 455 |
+
logger.warning("Required columns (task, first_timer_score) not found in filtered dataframe")
|
| 456 |
+
return pd.DataFrame()
|
| 457 |
+
|
| 458 |
+
# Group by task and get top models
|
| 459 |
+
all_results = []
|
| 460 |
+
|
| 461 |
+
for task in filtered["task"].unique():
|
| 462 |
+
task_df = filtered[filtered["task"] == task].copy()
|
| 463 |
+
|
| 464 |
+
if task_df.empty:
|
| 465 |
+
continue
|
| 466 |
+
|
| 467 |
+
# Remove rows with NaN first_timer_score
|
| 468 |
+
task_df = task_df.dropna(subset=["first_timer_score"])
|
| 469 |
+
|
| 470 |
+
if task_df.empty:
|
| 471 |
+
continue
|
| 472 |
+
|
| 473 |
+
# For each model, get the benchmark with the highest first_timer_score
|
| 474 |
+
idx_max_series = task_df.groupby("modelId")["first_timer_score"].idxmax()
|
| 475 |
+
valid_indices = idx_max_series.dropna()
|
| 476 |
+
|
| 477 |
+
if valid_indices.empty:
|
| 478 |
+
continue
|
| 479 |
+
|
| 480 |
+
best_per_model = task_df.loc[valid_indices]
|
| 481 |
+
|
| 482 |
+
# Sort by first_timer_score (descending) and take top N
|
| 483 |
+
top_for_task = best_per_model.sort_values(
|
| 484 |
+
"first_timer_score",
|
| 485 |
+
ascending=False
|
| 486 |
+
).head(limit_per_task)
|
| 487 |
+
|
| 488 |
+
all_results.append(top_for_task)
|
| 489 |
+
|
| 490 |
+
if not all_results:
|
| 491 |
+
logger.warning("No models found after filtering and grouping")
|
| 492 |
+
return pd.DataFrame()
|
| 493 |
+
|
| 494 |
+
# Combine all results
|
| 495 |
+
result = pd.concat(all_results, ignore_index=True)
|
| 496 |
+
|
| 497 |
+
# Sort by task, then by first_timer_score (descending)
|
| 498 |
+
if "task" in result.columns and "first_timer_score" in result.columns:
|
| 499 |
+
result = result.sort_values(
|
| 500 |
+
["task", "first_timer_score"],
|
| 501 |
+
ascending=[True, False]
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
return result
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def format_recommended_models_as_markdown(df: pd.DataFrame) -> str:
|
| 508 |
+
"""Format recommended WebGPU models as markdown for llms.txt embedding.
|
| 509 |
+
|
| 510 |
+
Args:
|
| 511 |
+
df: DataFrame containing recommended models (output from get_webgpu_beginner_friendly_models)
|
| 512 |
+
|
| 513 |
+
Returns:
|
| 514 |
+
Formatted markdown string
|
| 515 |
+
"""
|
| 516 |
+
if df.empty:
|
| 517 |
+
return "No recommended models available."
|
| 518 |
+
|
| 519 |
+
markdown_lines = [
|
| 520 |
+
"# Recommended Transformers.js Models (WebGPU Compatible)",
|
| 521 |
+
"",
|
| 522 |
+
"These models are optimized for beginners - popular, fast to load, and WebGPU compatible.",
|
| 523 |
+
"",
|
| 524 |
+
]
|
| 525 |
+
|
| 526 |
+
# Group by task
|
| 527 |
+
if "task" not in df.columns:
|
| 528 |
+
return "No task information available."
|
| 529 |
+
|
| 530 |
+
for task in sorted(df["task"].unique()):
|
| 531 |
+
task_df = df[df["task"] == task].copy()
|
| 532 |
+
|
| 533 |
+
if task_df.empty:
|
| 534 |
+
continue
|
| 535 |
+
|
| 536 |
+
# Add task header
|
| 537 |
+
markdown_lines.append(f"## {task.title()}")
|
| 538 |
+
markdown_lines.append("")
|
| 539 |
+
|
| 540 |
+
# Sort by first_timer_score descending
|
| 541 |
+
if "first_timer_score" in task_df.columns:
|
| 542 |
+
task_df = task_df.sort_values("first_timer_score", ascending=False)
|
| 543 |
+
|
| 544 |
+
# Add each model
|
| 545 |
+
for idx, row in task_df.iterrows():
|
| 546 |
+
model_id = row.get("modelId", "Unknown")
|
| 547 |
+
score = row.get("first_timer_score", None)
|
| 548 |
+
downloads = row.get("downloads", 0)
|
| 549 |
+
likes = row.get("likes", 0)
|
| 550 |
+
load_time = row.get("load_ms_p50", None)
|
| 551 |
+
infer_time = row.get("first_infer_ms_p50", None)
|
| 552 |
+
|
| 553 |
+
# Model entry
|
| 554 |
+
markdown_lines.append(f"### {model_id}")
|
| 555 |
+
markdown_lines.append("")
|
| 556 |
+
|
| 557 |
+
# WebGPU compatibility
|
| 558 |
+
markdown_lines.append("**WebGPU Compatible:** β
Yes")
|
| 559 |
+
markdown_lines.append("")
|
| 560 |
+
|
| 561 |
+
# Metrics
|
| 562 |
+
metrics = []
|
| 563 |
+
if load_time is not None:
|
| 564 |
+
metrics.append(f"Load: {load_time:.1f}ms")
|
| 565 |
+
if infer_time is not None:
|
| 566 |
+
metrics.append(f"Inference: {infer_time:.1f}ms")
|
| 567 |
+
if downloads:
|
| 568 |
+
if downloads >= 1_000_000:
|
| 569 |
+
downloads_str = f"{downloads / 1_000_000:.1f}M"
|
| 570 |
+
elif downloads >= 1_000:
|
| 571 |
+
downloads_str = f"{downloads / 1_000:.1f}k"
|
| 572 |
+
else:
|
| 573 |
+
downloads_str = str(downloads)
|
| 574 |
+
metrics.append(f"Downloads: {downloads_str}")
|
| 575 |
+
if likes:
|
| 576 |
+
metrics.append(f"Likes: {likes}")
|
| 577 |
+
|
| 578 |
+
if metrics:
|
| 579 |
+
markdown_lines.append(f"**Metrics:** {' | '.join(metrics)}")
|
| 580 |
+
|
| 581 |
+
markdown_lines.append("")
|
| 582 |
+
|
| 583 |
+
markdown_lines.append("---")
|
| 584 |
+
markdown_lines.append("")
|
| 585 |
+
|
| 586 |
+
# Add footer
|
| 587 |
+
markdown_lines.extend([
|
| 588 |
+
"## About These Recommendations",
|
| 589 |
+
"",
|
| 590 |
+
"Models are selected based on:",
|
| 591 |
+
"- **Popularity**: Downloads and likes from HuggingFace Hub",
|
| 592 |
+
"- **Performance**: Fast loading and inference times",
|
| 593 |
+
"- **Compatibility**: All models have successful WebGPU benchmark results",
|
| 594 |
+
"",
|
| 595 |
+
"These models are recommended for beginners getting started with Transformers.js.",
|
| 596 |
+
])
|
| 597 |
+
|
| 598 |
+
return "\n".join(markdown_lines)
|
| 599 |
+
|
| 600 |
+
|
| 601 |
def get_unique_values(df: pd.DataFrame, column: str) -> List[str]:
|
| 602 |
"""Get unique values from a column for dropdown choices.
|
| 603 |
|