whitphx's picture
whitphx HF Staff
update
05b75ca
"""
Transformers.js Benchmark Leaderboard
A Gradio app that displays benchmark results from a HuggingFace Dataset repository.
"""
import os
import logging
import pandas as pd
import gradio as gr
from dotenv import load_dotenv
from data_loader import (
load_benchmark_data,
get_unique_values,
get_webgpu_beginner_friendly_models,
format_recommended_models_as_markdown,
)
from formatters import apply_formatting
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# Load environment variables
load_dotenv()
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO")
HF_TOKEN = os.getenv("HF_TOKEN")
def load_data() -> pd.DataFrame:
"""Load benchmark data from configured HF Dataset repository."""
# Load raw data
df = load_benchmark_data(
dataset_repo=HF_DATASET_REPO,
token=HF_TOKEN,
)
return df
def format_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""Apply formatting to dataframe for display."""
if df.empty:
return df
return df.apply(lambda row: pd.Series(apply_formatting(row.to_dict())), axis=1)
def filter_data(
df: pd.DataFrame,
model_filter: str,
task_filter: str,
platform_filter: str,
device_filter: str,
mode_filter: str,
dtype_filter: str,
status_filter: str,
) -> pd.DataFrame:
"""Filter benchmark data based on user inputs."""
if df.empty:
return df
filtered = df.copy()
# Model name filter
if model_filter:
filtered = filtered[
filtered["modelId"].str.contains(model_filter, case=False, na=False)
]
# Task filter
if task_filter and task_filter != "All":
filtered = filtered[filtered["task"] == task_filter]
# Platform filter
if platform_filter and platform_filter != "All":
filtered = filtered[filtered["platform"] == platform_filter]
# Device filter
if device_filter and device_filter != "All":
filtered = filtered[filtered["device"] == device_filter]
# Mode filter
if mode_filter and mode_filter != "All":
filtered = filtered[filtered["mode"] == mode_filter]
# DType filter
if dtype_filter and dtype_filter != "All":
filtered = filtered[filtered["dtype"] == dtype_filter]
# Status filter
if status_filter and status_filter != "All":
filtered = filtered[filtered["status"] == status_filter]
return filtered
def create_leaderboard_ui():
"""Create the Gradio UI for the leaderboard."""
# Load initial data
df = load_data()
formatted_df = format_dataframe(df)
with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
# Cache raw data in Gradio state to avoid reloading on every filter change
raw_data_state = gr.State(df)
gr.Markdown("# πŸ† Transformers.js Benchmark Leaderboard")
gr.Markdown(
"Compare benchmark results for different models, platforms, and configurations."
)
if not HF_DATASET_REPO:
gr.Markdown(
"⚠️ **HF_DATASET_REPO not configured.** "
"Please set the environment variable to load benchmark data."
)
gr.Markdown(
"πŸ’‘ **Tip:** Use the recommended models section below to find popular models "
"that are fast to load and quick to run - perfect for getting started!"
)
# Recommended models section
gr.Markdown("## ⭐ Recommended WebGPU Models for Beginners")
gr.Markdown(
"These models are selected for being:\n"
"- **WebGPU compatible** - Work in modern browsers with GPU acceleration\n"
"- **Beginner-friendly** - Popular, fast to load, and quick to run\n"
"- Sorted by task type, showing top 3-5 models per task"
)
# Get recommended models
recommended_models = get_webgpu_beginner_friendly_models(df, limit_per_task=5)
formatted_recommended = format_dataframe(recommended_models)
markdown_output = format_recommended_models_as_markdown(recommended_models)
recommended_table = gr.DataFrame(
value=formatted_recommended,
label="Top WebGPU-Compatible Models by Task",
interactive=False,
wrap=True,
)
gr.Markdown("### πŸ“ Markdown Output for llms.txt")
gr.Markdown(
"Copy the markdown below to embed in your llms.txt or documentation:"
)
markdown_textbox = gr.Textbox(
value=markdown_output,
label="Markdown for llms.txt",
lines=20,
max_lines=30,
show_copy_button=True,
interactive=False,
)
gr.Markdown("---")
gr.Markdown("## πŸ” Full Benchmark Results")
with gr.Row():
refresh_btn = gr.Button("πŸ”„ Refresh Data", variant="primary")
with gr.Row():
model_filter = gr.Textbox(
label="Model Name",
placeholder="Filter by model name (e.g., 'bert', 'gpt')",
)
task_filter = gr.Dropdown(
label="Task",
choices=get_unique_values(df, "task"),
value="All",
)
with gr.Row():
platform_filter = gr.Dropdown(
label="Platform",
choices=get_unique_values(df, "platform"),
value="All",
)
device_filter = gr.Dropdown(
label="Device",
choices=get_unique_values(df, "device"),
value="All",
)
with gr.Row():
mode_filter = gr.Dropdown(
label="Mode",
choices=get_unique_values(df, "mode"),
value="All",
)
dtype_filter = gr.Dropdown(
label="DType",
choices=get_unique_values(df, "dtype"),
value="All",
)
status_filter = gr.Dropdown(
label="Status",
choices=get_unique_values(df, "status"),
value="All",
)
results_table = gr.DataFrame(
value=formatted_df,
label="All Benchmark Results",
interactive=False,
wrap=True,
)
gr.Markdown("### πŸ“Š Metrics")
gr.Markdown(
"**Benchmark Metrics:**\n"
"- **load_ms**: Model loading time in milliseconds\n"
"- **first_infer_ms**: First inference time in milliseconds\n"
"- **subsequent_infer_ms**: Subsequent inference time in milliseconds\n"
"- **p50/p90**: 50th and 90th percentile values\n\n"
"**HuggingFace Metrics:**\n"
"- **downloads**: Total downloads from HuggingFace Hub\n"
"- **likes**: Number of likes on HuggingFace Hub\n\n"
"**WebGPU Compatibility:**\n"
"- Models in the recommended section are all WebGPU compatible\n"
"- WebGPU enables GPU acceleration in modern browsers\n\n"
"**⚠️ Important Note About Performance Metrics:**\n"
"All metrics are measured in a controlled benchmark environment. "
"They are useful for **comparing models against each other**, but may not reflect "
"actual performance in your environment. Factors like hardware, browser, OS, and system load affect real-world performance. "
"We recommend testing models in your own environment for accurate measurements."
)
def update_data():
"""Reload data from HuggingFace."""
new_df = load_data()
formatted_new_df = format_dataframe(new_df)
# Update recommended models
new_recommended = get_webgpu_beginner_friendly_models(new_df, limit_per_task=5)
formatted_new_recommended = format_dataframe(new_recommended)
new_markdown = format_recommended_models_as_markdown(new_recommended)
return (
new_df, # Update cached raw data
formatted_new_recommended, # Update recommended models
new_markdown, # Update markdown output
formatted_new_df,
gr.update(choices=get_unique_values(new_df, "task")),
gr.update(choices=get_unique_values(new_df, "platform")),
gr.update(choices=get_unique_values(new_df, "device")),
gr.update(choices=get_unique_values(new_df, "mode")),
gr.update(choices=get_unique_values(new_df, "dtype")),
gr.update(choices=get_unique_values(new_df, "status")),
)
def apply_filters(raw_df, model, task, platform, device, mode, dtype, status):
"""Apply filters and return filtered DataFrame."""
# Use cached raw data instead of reloading
filtered = filter_data(raw_df, model, task, platform, device, mode, dtype, status)
return format_dataframe(filtered)
# Refresh button updates data and resets filters
refresh_btn.click(
fn=update_data,
outputs=[
raw_data_state,
recommended_table,
markdown_textbox,
results_table,
task_filter,
platform_filter,
device_filter,
mode_filter,
dtype_filter,
status_filter,
],
)
# Filter inputs update the table (using cached raw data)
filter_inputs = [
raw_data_state,
model_filter,
task_filter,
platform_filter,
device_filter,
mode_filter,
dtype_filter,
status_filter,
]
model_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
task_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
platform_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
device_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
mode_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
dtype_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
status_filter.change(
fn=apply_filters,
inputs=filter_inputs,
outputs=results_table,
)
return demo
demo = create_leaderboard_ui()
demo.launch()