Spaces:
Runtime error
Runtime error
fix
Browse files
leaderboard/src/leaderboard/app.py
CHANGED
|
@@ -56,6 +56,7 @@ def filter_data(
|
|
| 56 |
device_filter: str,
|
| 57 |
mode_filter: str,
|
| 58 |
dtype_filter: str,
|
|
|
|
| 59 |
) -> pd.DataFrame:
|
| 60 |
"""Filter benchmark data based on user inputs."""
|
| 61 |
if df.empty:
|
|
@@ -89,6 +90,10 @@ def filter_data(
|
|
| 89 |
if dtype_filter and dtype_filter != "All":
|
| 90 |
filtered = filtered[filtered["dtype"] == dtype_filter]
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
return filtered
|
| 93 |
|
| 94 |
|
|
@@ -99,10 +104,9 @@ def create_leaderboard_ui():
|
|
| 99 |
df = load_data()
|
| 100 |
formatted_df = format_dataframe(df)
|
| 101 |
|
| 102 |
-
# Cache raw data in Gradio state to avoid reloading on every filter change
|
| 103 |
-
raw_data_state = gr.State(df)
|
| 104 |
-
|
| 105 |
with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
|
|
|
|
|
|
|
| 106 |
gr.Markdown("# π Transformers.js Benchmark Leaderboard")
|
| 107 |
gr.Markdown(
|
| 108 |
"Compare benchmark results for different models, platforms, and configurations."
|
|
@@ -156,6 +160,11 @@ def create_leaderboard_ui():
|
|
| 156 |
choices=get_unique_values(df, "dtype"),
|
| 157 |
value="All",
|
| 158 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
results_table = gr.DataFrame(
|
| 161 |
value=formatted_df,
|
|
@@ -193,12 +202,13 @@ def create_leaderboard_ui():
|
|
| 193 |
gr.update(choices=get_unique_values(new_df, "device")),
|
| 194 |
gr.update(choices=get_unique_values(new_df, "mode")),
|
| 195 |
gr.update(choices=get_unique_values(new_df, "dtype")),
|
|
|
|
| 196 |
)
|
| 197 |
|
| 198 |
-
def apply_filters(raw_df, model, task, platform, device, mode, dtype):
|
| 199 |
"""Apply filters and return filtered DataFrame."""
|
| 200 |
# Use cached raw data instead of reloading
|
| 201 |
-
filtered = filter_data(raw_df, model, task, platform, device, mode, dtype)
|
| 202 |
return format_dataframe(filtered)
|
| 203 |
|
| 204 |
# Refresh button updates data and resets filters
|
|
@@ -212,6 +222,7 @@ def create_leaderboard_ui():
|
|
| 212 |
device_filter,
|
| 213 |
mode_filter,
|
| 214 |
dtype_filter,
|
|
|
|
| 215 |
],
|
| 216 |
)
|
| 217 |
|
|
@@ -224,6 +235,7 @@ def create_leaderboard_ui():
|
|
| 224 |
device_filter,
|
| 225 |
mode_filter,
|
| 226 |
dtype_filter,
|
|
|
|
| 227 |
]
|
| 228 |
|
| 229 |
model_filter.change(
|
|
@@ -256,6 +268,11 @@ def create_leaderboard_ui():
|
|
| 256 |
inputs=filter_inputs,
|
| 257 |
outputs=results_table,
|
| 258 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
return demo
|
| 261 |
|
|
|
|
| 56 |
device_filter: str,
|
| 57 |
mode_filter: str,
|
| 58 |
dtype_filter: str,
|
| 59 |
+
status_filter: str,
|
| 60 |
) -> pd.DataFrame:
|
| 61 |
"""Filter benchmark data based on user inputs."""
|
| 62 |
if df.empty:
|
|
|
|
| 90 |
if dtype_filter and dtype_filter != "All":
|
| 91 |
filtered = filtered[filtered["dtype"] == dtype_filter]
|
| 92 |
|
| 93 |
+
# Status filter
|
| 94 |
+
if status_filter and status_filter != "All":
|
| 95 |
+
filtered = filtered[filtered["status"] == status_filter]
|
| 96 |
+
|
| 97 |
return filtered
|
| 98 |
|
| 99 |
|
|
|
|
| 104 |
df = load_data()
|
| 105 |
formatted_df = format_dataframe(df)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
| 107 |
with gr.Blocks(title="Transformers.js Benchmark Leaderboard") as demo:
|
| 108 |
+
# Cache raw data in Gradio state to avoid reloading on every filter change
|
| 109 |
+
raw_data_state = gr.State(df)
|
| 110 |
gr.Markdown("# π Transformers.js Benchmark Leaderboard")
|
| 111 |
gr.Markdown(
|
| 112 |
"Compare benchmark results for different models, platforms, and configurations."
|
|
|
|
| 160 |
choices=get_unique_values(df, "dtype"),
|
| 161 |
value="All",
|
| 162 |
)
|
| 163 |
+
status_filter = gr.Dropdown(
|
| 164 |
+
label="Status",
|
| 165 |
+
choices=get_unique_values(df, "status"),
|
| 166 |
+
value="All",
|
| 167 |
+
)
|
| 168 |
|
| 169 |
results_table = gr.DataFrame(
|
| 170 |
value=formatted_df,
|
|
|
|
| 202 |
gr.update(choices=get_unique_values(new_df, "device")),
|
| 203 |
gr.update(choices=get_unique_values(new_df, "mode")),
|
| 204 |
gr.update(choices=get_unique_values(new_df, "dtype")),
|
| 205 |
+
gr.update(choices=get_unique_values(new_df, "status")),
|
| 206 |
)
|
| 207 |
|
| 208 |
+
def apply_filters(raw_df, model, task, platform, device, mode, dtype, status):
|
| 209 |
"""Apply filters and return filtered DataFrame."""
|
| 210 |
# Use cached raw data instead of reloading
|
| 211 |
+
filtered = filter_data(raw_df, model, task, platform, device, mode, dtype, status)
|
| 212 |
return format_dataframe(filtered)
|
| 213 |
|
| 214 |
# Refresh button updates data and resets filters
|
|
|
|
| 222 |
device_filter,
|
| 223 |
mode_filter,
|
| 224 |
dtype_filter,
|
| 225 |
+
status_filter,
|
| 226 |
],
|
| 227 |
)
|
| 228 |
|
|
|
|
| 235 |
device_filter,
|
| 236 |
mode_filter,
|
| 237 |
dtype_filter,
|
| 238 |
+
status_filter,
|
| 239 |
]
|
| 240 |
|
| 241 |
model_filter.change(
|
|
|
|
| 268 |
inputs=filter_inputs,
|
| 269 |
outputs=results_table,
|
| 270 |
)
|
| 271 |
+
status_filter.change(
|
| 272 |
+
fn=apply_filters,
|
| 273 |
+
inputs=filter_inputs,
|
| 274 |
+
outputs=results_table,
|
| 275 |
+
)
|
| 276 |
|
| 277 |
return demo
|
| 278 |
|
leaderboard/src/leaderboard/data_loader.py
CHANGED
|
@@ -109,6 +109,11 @@ def flatten_result(result: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 109 |
except (ValueError, OSError):
|
| 110 |
timestamp_dt = None
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
flat = {
|
| 113 |
"id": result.get("id", ""),
|
| 114 |
"platform": result.get("platform", ""),
|
|
@@ -121,9 +126,16 @@ def flatten_result(result: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 121 |
"browser": result.get("browser", ""),
|
| 122 |
"dtype": result.get("dtype", ""),
|
| 123 |
"headed": result.get("headed", False),
|
| 124 |
-
"status":
|
| 125 |
"timestamp": timestamp_dt,
|
| 126 |
"runtime": result.get("runtime", ""),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
}
|
| 128 |
|
| 129 |
# Extract metrics if available (already at top level)
|
|
|
|
| 109 |
except (ValueError, OSError):
|
| 110 |
timestamp_dt = None
|
| 111 |
|
| 112 |
+
# Determine actual status - if there's an error, it should be "failed"
|
| 113 |
+
status = result.get("status", "")
|
| 114 |
+
if "error" in result:
|
| 115 |
+
status = "failed"
|
| 116 |
+
|
| 117 |
flat = {
|
| 118 |
"id": result.get("id", ""),
|
| 119 |
"platform": result.get("platform", ""),
|
|
|
|
| 126 |
"browser": result.get("browser", ""),
|
| 127 |
"dtype": result.get("dtype", ""),
|
| 128 |
"headed": result.get("headed", False),
|
| 129 |
+
"status": status,
|
| 130 |
"timestamp": timestamp_dt,
|
| 131 |
"runtime": result.get("runtime", ""),
|
| 132 |
+
# Initialize metric fields with None (will be filled if metrics exist)
|
| 133 |
+
"load_ms_p50": None,
|
| 134 |
+
"load_ms_p90": None,
|
| 135 |
+
"first_infer_ms_p50": None,
|
| 136 |
+
"first_infer_ms_p90": None,
|
| 137 |
+
"subsequent_infer_ms_p50": None,
|
| 138 |
+
"subsequent_infer_ms_p90": None,
|
| 139 |
}
|
| 140 |
|
| 141 |
# Extract metrics if available (already at top level)
|