Ákos Hadnagy commited on
Commit
b6b18a0
·
1 Parent(s): 954d017

UI improvements

Browse files
Files changed (2) hide show
  1. app.py +197 -44
  2. scenario_mappings.json +9 -9
app.py CHANGED
@@ -31,6 +31,7 @@ class BenchmarkDashboard:
31
  self.reader = BenchmarkDataReader()
32
  self.df = None
33
  self.scenario_mappings = self.load_scenario_mappings()
 
34
  self.load_data()
35
 
36
  def load_data(self) -> None:
@@ -72,16 +73,121 @@ class BenchmarkDashboard:
72
  # If not found in mappings, assume it's already a raw name
73
  return readable_name
74
 
75
- def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  """Get unique values for filter dropdowns and date range."""
77
  if self.df_pandas.empty:
78
- return [], [], [], [], "", ""
79
 
80
  models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
81
 
82
- # Get scenarios with human-readable names for display
83
  raw_scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
84
- scenarios = [self.get_readable_scenario_name(scenario) for scenario in raw_scenarios]
85
 
86
  gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
87
 
@@ -122,9 +228,9 @@ class BenchmarkDashboard:
122
  min_date = self.df_pandas['timestamp'].min().strftime('%Y-%m-%d')
123
  max_date = self.df_pandas['timestamp'].max().strftime('%Y-%m-%d')
124
 
125
- return models, scenarios, gpus, benchmark_runs, min_date, max_date
126
 
127
- def filter_data(self, selected_models: List[str], selected_scenarios: List[str],
128
  selected_gpus: List[str], selected_run: str = None,
129
  start_date: str = None, end_date: str = None) -> pd.DataFrame:
130
  """Filter data based on user selections."""
@@ -133,11 +239,12 @@ class BenchmarkDashboard:
133
 
134
  filtered_df = self.df_pandas.copy()
135
 
136
- if selected_models:
137
- filtered_df = filtered_df[filtered_df['model_name'].isin(selected_models)]
138
  if selected_scenarios:
139
- # Convert human-readable scenario names back to raw names for filtering
140
- raw_scenarios = [self.get_raw_scenario_name(scenario) for scenario in selected_scenarios]
 
141
  filtered_df = filtered_df[filtered_df['scenario_name'].isin(raw_scenarios)]
142
  if selected_gpus:
143
  filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
@@ -201,9 +308,9 @@ class BenchmarkDashboard:
201
  x='scenario_display',
202
  y=metric,
203
  color='model_name',
204
- title=f'Performance Comparison: {metric.replace("_", " ").title()}',
205
  labels={
206
- metric: metric.replace("_", " ").title(),
207
  'scenario_display': 'Benchmark Scenario',
208
  'model_name': 'Model'
209
  },
@@ -255,7 +362,7 @@ class BenchmarkDashboard:
255
  hovertemplate=f'<b>{model}</b><br>' +
256
  f'Scenario: {readable_scenario}<br>' +
257
  'Time: %{x}<br>' +
258
- f'{metric.replace("_", " ").title()}: %{{y}}<br>' +
259
  '<extra></extra>'
260
  ))
261
 
@@ -269,9 +376,9 @@ class BenchmarkDashboard:
269
  )
270
 
271
  fig.update_layout(
272
- title=f'Historical Trends Across Benchmark Runs: {metric.replace("_", " ").title()}',
273
  xaxis_title='Timestamp',
274
- yaxis_title=metric.replace("_", " ").title(),
275
  height=500,
276
  hovermode='closest',
277
  showlegend=True,
@@ -325,7 +432,7 @@ class BenchmarkDashboard:
325
  return fig
326
 
327
  def create_metrics_summary_table(self, filtered_df: pd.DataFrame) -> pd.DataFrame:
328
- """Create summary statistics table."""
329
  if filtered_df.empty:
330
  return pd.DataFrame({'Message': ['No data available for selected filters']})
331
 
@@ -336,24 +443,34 @@ class BenchmarkDashboard:
336
  ]
337
 
338
  summary_data = []
339
- for model in filtered_df['model_name'].unique():
340
- model_data = filtered_df[filtered_df['model_name'] == model]
341
 
342
- row = {'Model': model, 'Scenarios': len(model_data)}
 
 
 
 
 
 
 
 
 
343
  for metric in metrics_cols:
344
- if metric in model_data.columns:
345
- row[f'{metric.replace("_", " ").title()} (Avg)'] = f"{model_data[metric].mean():.2f}"
346
- row[f'{metric.replace("_", " ").title()} (Best)'] = f"{model_data[metric].min() if 'latency' in metric or 'time' in metric else model_data[metric].max():.2f}"
 
 
 
347
 
348
  summary_data.append(row)
349
 
350
  return pd.DataFrame(summary_data)
351
 
352
- def update_dashboard(self, selected_models: List[str], selected_scenarios: List[str],
353
  selected_gpus: List[str], selected_run: str, metric: str):
354
  """Update all dashboard components based on current filters."""
355
  filtered_df = self.filter_data(
356
- selected_models, selected_scenarios, selected_gpus, selected_run
357
  )
358
 
359
  # Create charts
@@ -363,23 +480,35 @@ class BenchmarkDashboard:
363
 
364
  # Summary stats
365
  if not filtered_df.empty:
 
 
 
 
 
 
 
 
 
 
366
  summary_text = f"""
367
- **Data Summary:**
368
- - Total Scenarios: {len(filtered_df)}
369
- - Models: {', '.join(filtered_df['model_name'].unique())}
370
- - Date Range: {filtered_df['timestamp'].min().strftime('%Y-%m-%d')} to {filtered_df['timestamp'].max().strftime('%Y-%m-%d')}
371
- - Benchmark Runs: {len(filtered_df.groupby(['timestamp', 'file_path']))}
 
 
372
  """
373
  else:
374
  summary_text = "No data available for current selection."
375
 
376
  return perf_chart, gpu_chart, summary_table, summary_text
377
 
378
- def update_historical_trends(self, selected_models: List[str], selected_scenarios: List[str],
379
  selected_gpus: List[str], start_date: str, end_date: str, metric: str):
380
  """Update historical trends chart with date filtering."""
381
  filtered_df = self.filter_data(
382
- selected_models, selected_scenarios, selected_gpus,
383
  start_date=start_date, end_date=end_date
384
  )
385
  trend_chart = self.create_historical_trend_chart(filtered_df, metric)
@@ -389,34 +518,38 @@ class BenchmarkDashboard:
389
  def create_gradio_interface() -> gr.Interface:
390
  """Create the Gradio interface."""
391
  dashboard = BenchmarkDashboard()
392
- models, scenarios, gpus, benchmark_runs, min_date, max_date = dashboard.get_filter_options()
393
 
394
- # Performance metrics options
395
- metric_options = [
396
  "tokens_per_second_mean",
397
  "latency_seconds_mean",
398
  "time_to_first_token_seconds_mean",
399
  "time_per_output_token_seconds_mean"
400
  ]
 
401
 
402
  with gr.Blocks(title="LLM Inference Performance Dashboard", theme=gr.themes.Soft()) as demo:
403
  gr.Markdown("# 🚀 LLM Inference Performance Dashboard")
404
  gr.Markdown("Analyze and compare LLM inference performance across models, scenarios, and hardware configurations.")
 
405
 
406
  with gr.Row():
407
  with gr.Column(scale=1):
408
  gr.Markdown("## Filters")
409
 
410
- model_filter = gr.CheckboxGroup(
411
  choices=models,
412
- value=models,
413
- label="Select Models",
414
  interactive=True
415
  )
416
- scenario_filter = gr.CheckboxGroup(
417
  choices=scenarios,
418
- value=scenarios[:5] if len(scenarios) > 5 else scenarios, # Limit initial selection
419
  label="Select Scenarios",
 
 
420
  interactive=True
421
  )
422
  gpu_filter = gr.CheckboxGroup(
@@ -427,7 +560,7 @@ def create_gradio_interface() -> gr.Interface:
427
  )
428
  metric_selector = gr.Dropdown(
429
  choices=metric_options,
430
- value="tokens_per_second_mean",
431
  label="Primary Metric",
432
  interactive=True
433
  )
@@ -494,16 +627,29 @@ def create_gradio_interface() -> gr.Interface:
494
  filtered_runs = [run for run in benchmark_runs if search_text.lower() in run.lower()]
495
  return gr.Dropdown(choices=filtered_runs, value=filtered_runs[0] if filtered_runs else None)
496
 
 
 
 
 
 
 
 
 
 
497
  # Update function for main dashboard (excluding historical trends)
498
- def update_main(models_selected, scenarios_selected, gpus_selected, run_selected, metric):
 
 
499
  return dashboard.update_dashboard(
500
- models_selected, scenarios_selected, gpus_selected, run_selected, metric
501
  )
502
 
503
  # Update function for historical trends
504
- def update_trends(models_selected, scenarios_selected, gpus_selected, start_dt, end_dt, metric):
 
 
505
  return dashboard.update_historical_trends(
506
- models_selected, scenarios_selected, gpus_selected, start_dt, end_dt, metric
507
  )
508
 
509
  # Set up interactivity for main dashboard
@@ -525,6 +671,13 @@ def create_gradio_interface() -> gr.Interface:
525
  # Connect search field to filter benchmark runs
526
  run_search.change(fn=filter_benchmark_runs, inputs=[run_search], outputs=[benchmark_run_selector])
527
 
 
 
 
 
 
 
 
528
  # Initial load
529
  demo.load(fn=update_main, inputs=main_inputs, outputs=main_outputs)
530
  demo.load(fn=update_trends, inputs=trends_inputs, outputs=trends_outputs)
 
31
  self.reader = BenchmarkDataReader()
32
  self.df = None
33
  self.scenario_mappings = self.load_scenario_mappings()
34
+ self.metric_mappings = self.get_metric_mappings()
35
  self.load_data()
36
 
37
  def load_data(self) -> None:
 
73
  # If not found in mappings, assume it's already a raw name
74
  return readable_name
75
 
76
+ def get_metric_mappings(self) -> Dict[str, str]:
77
+ """Get metric name mappings from technical to human-readable names."""
78
+ return {
79
+ "tokens_per_second_mean": "Tokens per Second",
80
+ "latency_seconds_mean": "Latency (seconds)",
81
+ "time_to_first_token_seconds_mean": "Time to First Token (seconds)",
82
+ "time_per_output_token_seconds_mean": "Time per Output Token (seconds)"
83
+ }
84
+
85
+ def get_readable_metric_name(self, metric_name: str) -> str:
86
+ """Get human-readable metric name or return original if not mapped."""
87
+ return self.metric_mappings.get(metric_name, metric_name)
88
+
89
+ def get_raw_metric_name(self, readable_name: str) -> str:
90
+ """Convert human-readable metric name back to raw metric name."""
91
+ for raw_name, mapped_name in self.metric_mappings.items():
92
+ if mapped_name == readable_name:
93
+ return raw_name
94
+ return readable_name
95
+
96
+ def get_best_scenario_for_model(self, model_name: str, metric: str = "tokens_per_second_mean") -> str:
97
+ """Get the best performing scenario for a given model."""
98
+ if self.df_pandas.empty:
99
+ return ""
100
+
101
+ # Filter data for this model
102
+ model_data = self.df_pandas[self.df_pandas['model_name'] == model_name]
103
+ if model_data.empty:
104
+ return ""
105
+
106
+ # Define priority order for scenarios (preference for kernelized/compiled)
107
+ priority_order = [
108
+ "eager_sdpa_flash_attention",
109
+ "eager_sdpa_efficient_attention",
110
+ "compiled_compile_max-autotune_sdpa_efficient_attention",
111
+ "compiled_compile_max-autotune_sdpa_default",
112
+ "compiled_compile_max-autotune_sdpa_math",
113
+ "compiled_compile_max-autotune_eager_attn",
114
+ "eager_sdpa_default",
115
+ "eager_sdpa_math",
116
+ "eager_eager_attn"
117
+ ]
118
+
119
+ # Check if metric exists
120
+ if metric not in model_data.columns:
121
+ # Fallback to first available scenario in priority order
122
+ for scenario in priority_order:
123
+ if scenario in model_data['scenario_name'].values:
124
+ return self.get_readable_scenario_name(scenario)
125
+ return self.get_readable_scenario_name(model_data['scenario_name'].iloc[0])
126
+
127
+ # Find best performing scenario (highest value for throughput metrics, lowest for latency)
128
+ is_latency_metric = 'latency' in metric.lower() or 'time' in metric.lower()
129
+
130
+ if is_latency_metric:
131
+ best_row = model_data.loc[model_data[metric].idxmin()]
132
+ else:
133
+ best_row = model_data.loc[model_data[metric].idxmax()]
134
+
135
+ return self.get_readable_scenario_name(best_row['scenario_name'])
136
+
137
+ def get_organized_scenarios(self, available_raw_scenarios: List[str]) -> Tuple[List[str], List[str]]:
138
+ """Organize scenarios into priority groups with separators."""
139
+ # Define priority scenarios (main recommended scenarios)
140
+ priority_raw_scenarios = [
141
+ "eager_sdpa_flash_attention",
142
+ "compiled_compile_max-autotune_sdpa_default"
143
+ ]
144
+
145
+ # Define expert/advanced scenarios (including efficient attention)
146
+ expert_raw_scenarios = [
147
+ "eager_sdpa_efficient_attention",
148
+ "compiled_compile_max-autotune_sdpa_efficient_attention",
149
+ "compiled_compile_max-autotune_eager_attn",
150
+ "compiled_compile_max-autotune_sdpa_math",
151
+ "eager_sdpa_default",
152
+ "eager_eager_attn",
153
+ "eager_sdpa_math"
154
+ ]
155
+
156
+ # Get available scenarios in priority order
157
+ priority_scenarios = []
158
+ expert_scenarios = []
159
+
160
+ # Add priority scenarios that are available
161
+ for raw_scenario in priority_raw_scenarios:
162
+ if raw_scenario in available_raw_scenarios:
163
+ readable_name = self.get_readable_scenario_name(raw_scenario)
164
+ priority_scenarios.append(readable_name)
165
+
166
+ # Add expert scenarios that are available
167
+ for raw_scenario in expert_raw_scenarios:
168
+ if raw_scenario in available_raw_scenarios:
169
+ readable_name = self.get_readable_scenario_name(raw_scenario)
170
+ expert_scenarios.append(readable_name)
171
+
172
+ # Combine with separator
173
+ all_scenarios = priority_scenarios.copy()
174
+ if expert_scenarios:
175
+ all_scenarios.append("─── Advanced/Developer Options ───")
176
+ all_scenarios.extend(expert_scenarios)
177
+
178
+ # Return all scenarios (no default selections for multi-select anymore)
179
+ return all_scenarios, []
180
+
181
+ def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], List[str], str, str]:
182
  """Get unique values for filter dropdowns and date range."""
183
  if self.df_pandas.empty:
184
+ return [], [], [], [], [], "", ""
185
 
186
  models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
187
 
188
+ # Get organized scenarios with priority ordering and default selections
189
  raw_scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
190
+ scenarios, default_scenarios = self.get_organized_scenarios(raw_scenarios)
191
 
192
  gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
193
 
 
228
  min_date = self.df_pandas['timestamp'].min().strftime('%Y-%m-%d')
229
  max_date = self.df_pandas['timestamp'].max().strftime('%Y-%m-%d')
230
 
231
+ return models, scenarios, gpus, benchmark_runs, default_scenarios, min_date, max_date
232
 
233
+ def filter_data(self, selected_model: str, selected_scenarios: List[str],
234
  selected_gpus: List[str], selected_run: str = None,
235
  start_date: str = None, end_date: str = None) -> pd.DataFrame:
236
  """Filter data based on user selections."""
 
239
 
240
  filtered_df = self.df_pandas.copy()
241
 
242
+ if selected_model:
243
+ filtered_df = filtered_df[filtered_df['model_name'] == selected_model]
244
  if selected_scenarios:
245
+ # Filter out separator lines and convert human-readable scenario names back to raw names for filtering
246
+ valid_scenarios = [scenario for scenario in selected_scenarios if not scenario.startswith("───")]
247
+ raw_scenarios = [self.get_raw_scenario_name(scenario) for scenario in valid_scenarios]
248
  filtered_df = filtered_df[filtered_df['scenario_name'].isin(raw_scenarios)]
249
  if selected_gpus:
250
  filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
 
308
  x='scenario_display',
309
  y=metric,
310
  color='model_name',
311
+ title=f'Performance Comparison: {self.get_readable_metric_name(metric)}',
312
  labels={
313
+ metric: self.get_readable_metric_name(metric),
314
  'scenario_display': 'Benchmark Scenario',
315
  'model_name': 'Model'
316
  },
 
362
  hovertemplate=f'<b>{model}</b><br>' +
363
  f'Scenario: {readable_scenario}<br>' +
364
  'Time: %{x}<br>' +
365
+ f'{self.get_readable_metric_name(metric)}: %{{y}}<br>' +
366
  '<extra></extra>'
367
  ))
368
 
 
376
  )
377
 
378
  fig.update_layout(
379
+ title=f'Historical Trends Across Benchmark Runs: {self.get_readable_metric_name(metric)}',
380
  xaxis_title='Timestamp',
381
+ yaxis_title=self.get_readable_metric_name(metric),
382
  height=500,
383
  hovermode='closest',
384
  showlegend=True,
 
432
  return fig
433
 
434
  def create_metrics_summary_table(self, filtered_df: pd.DataFrame) -> pd.DataFrame:
435
+ """Create summary statistics table with each scenario as a separate row."""
436
  if filtered_df.empty:
437
  return pd.DataFrame({'Message': ['No data available for selected filters']})
438
 
 
443
  ]
444
 
445
  summary_data = []
 
 
446
 
447
+ # Group by scenario instead of model (since we're now single-model focused)
448
+ for scenario in filtered_df['scenario_name'].unique():
449
+ scenario_data = filtered_df[filtered_df['scenario_name'] == scenario]
450
+
451
+ # Get human-readable scenario name
452
+ readable_scenario = self.get_readable_scenario_name(scenario)
453
+
454
+ row = {'Scenario': readable_scenario}
455
+
456
+ # Add metrics for this scenario
457
  for metric in metrics_cols:
458
+ if metric in scenario_data.columns and not scenario_data[metric].isna().all():
459
+ readable_metric = self.get_readable_metric_name(metric)
460
+
461
+ # For scenarios, show the mean value (since each scenario should have one value per run)
462
+ mean_value = scenario_data[metric].mean()
463
+ row[readable_metric] = f"{mean_value:.2f}"
464
 
465
  summary_data.append(row)
466
 
467
  return pd.DataFrame(summary_data)
468
 
469
+ def update_dashboard(self, selected_model: str, selected_scenarios: List[str],
470
  selected_gpus: List[str], selected_run: str, metric: str):
471
  """Update all dashboard components based on current filters."""
472
  filtered_df = self.filter_data(
473
+ selected_model, selected_scenarios, selected_gpus, selected_run
474
  )
475
 
476
  # Create charts
 
480
 
481
  # Summary stats
482
  if not filtered_df.empty:
483
+ model_name = filtered_df['model_name'].iloc[0]
484
+
485
+ # Get list of scenario names (raw) and convert to readable names
486
+ raw_scenario_names = sorted(filtered_df['scenario_name'].unique())
487
+ readable_scenario_names = [self.get_readable_scenario_name(scenario) for scenario in raw_scenario_names]
488
+ scenarios_list = ", ".join(readable_scenario_names)
489
+
490
+ date_range = f"{filtered_df['timestamp'].min().strftime('%Y-%m-%d')} to {filtered_df['timestamp'].max().strftime('%Y-%m-%d')}"
491
+ benchmark_runs = len(filtered_df.groupby(['timestamp', 'file_path']))
492
+
493
  summary_text = f"""
494
+ **Analysis Summary for {model_name}:**
495
+ - Date Range: {date_range}
496
+ - Benchmark Runs: {benchmark_runs}
497
+ - Total Data Points: {len(filtered_df)}
498
+
499
+ **Selected Scenarios:**
500
+ {scenarios_list}
501
  """
502
  else:
503
  summary_text = "No data available for current selection."
504
 
505
  return perf_chart, gpu_chart, summary_table, summary_text
506
 
507
+ def update_historical_trends(self, selected_model: str, selected_scenarios: List[str],
508
  selected_gpus: List[str], start_date: str, end_date: str, metric: str):
509
  """Update historical trends chart with date filtering."""
510
  filtered_df = self.filter_data(
511
+ selected_model, selected_scenarios, selected_gpus,
512
  start_date=start_date, end_date=end_date
513
  )
514
  trend_chart = self.create_historical_trend_chart(filtered_df, metric)
 
518
  def create_gradio_interface() -> gr.Interface:
519
  """Create the Gradio interface."""
520
  dashboard = BenchmarkDashboard()
521
+ models, scenarios, gpus, benchmark_runs, default_scenarios, min_date, max_date = dashboard.get_filter_options()
522
 
523
+ # Performance metrics options (human-readable)
524
+ raw_metric_options = [
525
  "tokens_per_second_mean",
526
  "latency_seconds_mean",
527
  "time_to_first_token_seconds_mean",
528
  "time_per_output_token_seconds_mean"
529
  ]
530
+ metric_options = [dashboard.get_readable_metric_name(metric) for metric in raw_metric_options]
531
 
532
  with gr.Blocks(title="LLM Inference Performance Dashboard", theme=gr.themes.Soft()) as demo:
533
  gr.Markdown("# 🚀 LLM Inference Performance Dashboard")
534
  gr.Markdown("Analyze and compare LLM inference performance across models, scenarios, and hardware configurations.")
535
+ gr.Markdown("*💡 **Smart Defaults**: The best performing scenario is automatically selected for each model based on throughput analysis.*")
536
 
537
  with gr.Row():
538
  with gr.Column(scale=1):
539
  gr.Markdown("## Filters")
540
 
541
+ model_filter = gr.Dropdown(
542
  choices=models,
543
+ value=models[0] if models else None,
544
+ label="Select Model",
545
  interactive=True
546
  )
547
+ scenario_filter = gr.Dropdown(
548
  choices=scenarios,
549
+ value=[dashboard.get_best_scenario_for_model(models[0], "tokens_per_second_mean")] if models else [],
550
  label="Select Scenarios",
551
+ info="💡 The best performing scenario is automatically selected when you change models",
552
+ multiselect=True,
553
  interactive=True
554
  )
555
  gpu_filter = gr.CheckboxGroup(
 
560
  )
561
  metric_selector = gr.Dropdown(
562
  choices=metric_options,
563
+ value=dashboard.get_readable_metric_name("tokens_per_second_mean"),
564
  label="Primary Metric",
565
  interactive=True
566
  )
 
627
  filtered_runs = [run for run in benchmark_runs if search_text.lower() in run.lower()]
628
  return gr.Dropdown(choices=filtered_runs, value=filtered_runs[0] if filtered_runs else None)
629
 
630
+ # Function to update scenarios when model changes
631
+ def update_scenarios_for_model(selected_model, current_metric):
632
+ if not selected_model:
633
+ return []
634
+ # Convert readable metric name back to raw name
635
+ raw_metric = dashboard.get_raw_metric_name(current_metric)
636
+ best_scenario = dashboard.get_best_scenario_for_model(selected_model, raw_metric)
637
+ return [best_scenario] if best_scenario else []
638
+
639
  # Update function for main dashboard (excluding historical trends)
640
+ def update_main(model_selected, scenarios_selected, gpus_selected, run_selected, metric):
641
+ # Convert readable metric name back to raw name
642
+ raw_metric = dashboard.get_raw_metric_name(metric)
643
  return dashboard.update_dashboard(
644
+ model_selected, scenarios_selected, gpus_selected, run_selected, raw_metric
645
  )
646
 
647
  # Update function for historical trends
648
+ def update_trends(model_selected, scenarios_selected, gpus_selected, start_dt, end_dt, metric):
649
+ # Convert readable metric name back to raw name
650
+ raw_metric = dashboard.get_raw_metric_name(metric)
651
  return dashboard.update_historical_trends(
652
+ model_selected, scenarios_selected, gpus_selected, start_dt, end_dt, raw_metric
653
  )
654
 
655
  # Set up interactivity for main dashboard
 
671
  # Connect search field to filter benchmark runs
672
  run_search.change(fn=filter_benchmark_runs, inputs=[run_search], outputs=[benchmark_run_selector])
673
 
674
+ # Auto-update scenarios when model changes
675
+ model_filter.change(
676
+ fn=update_scenarios_for_model,
677
+ inputs=[model_filter, metric_selector],
678
+ outputs=[scenario_filter]
679
+ )
680
+
681
  # Initial load
682
  demo.load(fn=update_main, inputs=main_inputs, outputs=main_outputs)
683
  demo.load(fn=update_trends, inputs=trends_inputs, outputs=trends_outputs)
scenario_mappings.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "eager_eager_attn": "Eager Execution + Eager Attention",
3
- "eager_sdpa_default": "Eager Execution + SDPA Default",
4
- "eager_sdpa_math": "Eager Execution + SDPA Math Backend",
5
- "eager_sdpa_flash_attention": "Eager Execution + SDPA Flash Attention",
6
- "eager_sdpa_efficient_attention": "Eager Execution + SDPA Efficient Attention",
7
- "compiled_compile_max-autotune_eager_attn": "Compiled (Max-Autotune) + Eager Attention",
8
- "compiled_compile_max-autotune_sdpa_default": "Compiled (Max-Autotune) + SDPA Default",
9
- "compiled_compile_max-autotune_sdpa_math": "Compiled (Max-Autotune) + SDPA Math Backend",
10
- "compiled_compile_max-autotune_sdpa_efficient_attention": "Compiled (Max-Autotune) + SDPA Efficient Attention"
11
  }
 
1
  {
2
+ "eager_sdpa_flash_attention": "Flash Attention",
3
+ "compiled_compile_max-autotune_sdpa_default": "Compiled + SDPA Default",
4
+ "eager_sdpa_default": "SDPA Default",
5
+ "eager_eager_attn": "Eager Attention",
6
+ "eager_sdpa_math": "SDPA Math Backend",
7
+ "eager_sdpa_efficient_attention": "Efficient Attention",
8
+ "compiled_compile_max-autotune_sdpa_efficient_attention": "Compiled + Efficient Attention",
9
+ "compiled_compile_max-autotune_eager_attn": "Compiled + Eager Attention",
10
+ "compiled_compile_max-autotune_sdpa_math": "Compiled + SDPA Math Backend"
11
  }