AppleSwing commited on
Commit
3862c96
Β·
verified Β·
1 Parent(s): 96415eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -393
app.py CHANGED
@@ -7,7 +7,7 @@ os.environ["GRADIO_LANGUAGE"] = "en"
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
10
- # For testing purposes, you can uncomment the line below to set a dummy dir or keep the raise
11
  # RESULT_DIR = "generic_result_dir"
12
  raise RuntimeError(
13
  "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
@@ -33,13 +33,6 @@ def normalize(val, vmin, vmax, baseline=20):
33
  return baseline + (val - vmin) / (vmax - vmin) * (100 - baseline)
34
 
35
 
36
- def normalize_reversed(val, vmin, vmax, baseline=20):
37
- """Normalize value (reversed - lower is better) to baseline-100 range."""
38
- if vmax == vmin:
39
- return baseline + 40
40
- return baseline + (vmax - val) / (vmax - vmin) * (100 - baseline)
41
-
42
-
43
  def normalize_cost(val, max_tick, baseline=20):
44
  """Normalize cost (lower is better)."""
45
  if max_tick == 0:
@@ -50,48 +43,34 @@ def normalize_cost(val, max_tick, baseline=20):
50
  def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
51
  """Generate a CAP radar plot from selected rows."""
52
 
53
- # Standard layout settings for consistent sizing
54
  layout_settings = dict(
55
- height=750, # Taller height
56
- autosize=True, # Auto width
57
- margin=dict(t=80, b=100, l=80, r=80), # Balanced margins
58
  paper_bgcolor='white',
59
  plot_bgcolor='white',
60
  )
61
 
62
- # Validation: max 3 rows
63
  if not selected_rows_data or len(selected_rows_data) == 0:
64
  fig = go.Figure()
65
  fig.add_annotation(
66
  text="Please select 1-3 rows from the table to generate radar plot",
67
- xref="paper", yref="paper",
68
- x=0.5, y=0.5, showarrow=False,
69
- font=dict(size=16),
70
- xanchor='center',
71
- yanchor='middle'
72
- )
73
- fig.update_layout(
74
- xaxis=dict(visible=False),
75
- yaxis=dict(visible=False),
76
- **layout_settings
77
  )
 
78
  return fig
79
 
80
  if len(selected_rows_data) > 3:
81
  fig = go.Figure()
82
  fig.add_annotation(
83
  text="Error: Please select no more than 3 rows!",
84
- xref="paper", yref="paper",
85
- x=0.5, y=0.5, showarrow=False,
86
  font=dict(size=18, color="red"),
87
- xanchor='center',
88
- yanchor='middle'
89
- )
90
- fig.update_layout(
91
- xaxis=dict(visible=False),
92
- yaxis=dict(visible=False),
93
- **layout_settings
94
  )
 
95
  return fig
96
 
97
  datasets = [row.get('Dataset', '') for row in selected_rows_data]
@@ -100,25 +79,17 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
100
  fig = go.Figure()
101
  fig.add_annotation(
102
  text="Error: Please select rows from the same dataset!",
103
- xref="paper", yref="paper",
104
- x=0.5, y=0.5, showarrow=False,
105
  font=dict(size=18, color="red"),
106
- xanchor='center',
107
- yanchor='middle'
108
- )
109
- fig.update_layout(
110
- xaxis=dict(visible=False),
111
- yaxis=dict(visible=False),
112
- **layout_settings
113
  )
 
114
  return fig
115
 
116
  dataset_name = datasets[0] if datasets else "Unknown"
117
 
118
- # Extract metrics from selected rows
119
  data = {}
120
  for row in selected_rows_data:
121
- # Extract model name from HTML or use as-is
122
  model_name = row.get('Model', 'Unknown')
123
  if isinstance(model_name, str) and 'href' in model_name:
124
  try:
@@ -126,23 +97,19 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
126
  except:
127
  pass
128
 
129
- # Format legend name: extract name after "/" and add method
130
  method = row.get('Method', '')
131
  if isinstance(model_name, str) and '/' in model_name:
132
- legend_name = model_name.split('/')[-1] # Get part after last /
133
  else:
134
  legend_name = str(model_name)
135
 
136
- # Add method suffix
137
  if method and method not in ['Unknown', '-', '']:
138
  legend_name = f"{legend_name}-{method}"
139
 
140
- # Get metrics
141
  acc = row.get('Accuracy(%)', 0)
142
  cost = row.get('Cost($)', 0)
143
  throughput = row.get('Decoding T/s', 0)
144
 
145
- # Convert to float if needed
146
  try:
147
  acc = float(acc) if acc not in [None, '-', ''] else 0
148
  cost = float(cost) if cost not in [None, '-', ''] else 0
@@ -151,12 +118,11 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
151
  acc, cost, throughput = 0, 0, 0
152
 
153
  data[legend_name] = {
154
- 'accuracy': acc / 100.0 if acc > 1 else acc, # Normalize to 0-1
155
  'cost': cost,
156
  'throughput': throughput
157
  }
158
 
159
- # Get min/max for normalization
160
  throughputs = [v['throughput'] for v in data.values()]
161
  costs = [v['cost'] for v in data.values()]
162
  accs = [v['accuracy'] for v in data.values()]
@@ -177,7 +143,7 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
177
  normalize_cost(values['cost'], cost_max, baseline),
178
  normalize(values['accuracy'], acc_min, acc_max, baseline)
179
  ]
180
- norm_vals += [norm_vals[0]] # Close the loop
181
 
182
  hovertext = [
183
  f"Throughput: {raw_vals[0]:.2f} T/s",
@@ -197,32 +163,13 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
197
  ))
198
 
199
  fig.update_layout(
200
- title=dict(
201
- text=f"CAP Radar Plot: {dataset_name}",
202
- x=0.5,
203
- xanchor='center',
204
- font=dict(size=20)
205
- ),
206
  polar=dict(
207
- radialaxis=dict(
208
- visible=True,
209
- range=[0, 100],
210
- tickfont=dict(size=12)
211
- ),
212
- angularaxis=dict(
213
- tickfont=dict(size=14),
214
- rotation=90, # Rotate so top is 12 o'clock
215
- direction='clockwise'
216
- ),
217
- ),
218
- legend=dict(
219
- orientation='h',
220
- yanchor='bottom',
221
- y=-0.15,
222
- xanchor='center',
223
- x=0.5,
224
- font=dict(size=13)
225
  ),
 
226
  **layout_settings
227
  )
228
 
@@ -235,11 +182,9 @@ def json_to_row(path: str, metrics: dict) -> dict:
235
  model_name = "unknown-model"
236
 
237
  dataset = metrics.get("dataset", "Unknown")
238
-
239
  method = metrics.get("method", "Unknown")
240
  precision = metrics.get("precision", "Unknown")
241
  model_type = metrics.get("model_type", "Unknown")
242
-
243
  e2e_s = metrics.get("e2e_s", None)
244
  batch_size = metrics.get("batch_size", None)
245
  gpu_type = metrics.get("gpu_type", "")
@@ -258,7 +203,7 @@ def json_to_row(path: str, metrics: dict) -> dict:
258
 
259
  if isinstance(model_name, str) and "/" in model_name:
260
  hf_url = f"https://huggingface.co/{model_name}"
261
- model_cell = f"<a href='{hf_url}' target='_blank'>{model_name}</a>"
262
  else:
263
  model_cell = model_name
264
 
@@ -285,421 +230,230 @@ def json_to_row(path: str, metrics: dict) -> dict:
285
  return row
286
 
287
 
288
- def load_from_dir(
289
- dir_path: str,
290
- selected_tasks: List[str] | None = None,
291
- selected_frameworks: List[str] | None = None,
292
- selected_model_types: List[str] | None = None,
293
- selected_precisions: List[str] | None = None,
294
- search_keyword: str = "",
295
- force_refresh: bool = False,
296
- ):
297
  try:
298
  pattern = f"hf://datasets/{dir_path}/**/*.json"
299
  dl_mode = "force_redownload" if force_refresh else None
300
-
301
  print(f"Fetching from {pattern} (mode={dl_mode})...")
302
- ds = load_dataset(
303
- "json",
304
- data_files={"train": pattern},
305
- split="train",
306
- download_mode=dl_mode,
307
- )
308
- except Exception as e:
309
- empty_html = "<p>No files loaded or Dataset not found.</p>"
310
- return empty_html, []
311
 
312
  rows = []
313
  for i, example in enumerate(ds):
314
- if isinstance(example, dict):
315
- metrics = example.get("metrics") or example.get("json") or example
316
- else:
317
- metrics = example
318
  rows.append(json_to_row(f"{dir_path}#{i}", metrics))
319
 
320
  if not rows:
321
- empty_html = "<p>No records found.</p>"
322
- return empty_html, []
323
 
324
  df = pd.DataFrame(rows)
325
 
326
- # Filters
327
- if selected_tasks is not None:
328
- lower_selected = [x.lower() for x in selected_tasks]
329
- df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)]
330
-
331
- if selected_frameworks is not None:
332
- lower_selected = [str(x).lower() for x in selected_frameworks]
333
- df = df[df["Method"].astype(str).str.lower().isin(lower_selected)]
334
-
335
- if selected_model_types is not None:
336
- lower_selected = [str(x).lower() for x in selected_model_types]
337
- df = df[df["Model type"].astype(str).str.lower().isin(lower_selected)]
338
-
339
- if selected_precisions is not None:
340
- lower_selected = [str(x).lower() for x in selected_precisions]
341
- df = df[df["Precision"].astype(str).str.lower().isin(lower_selected)]
342
-
343
  if search_keyword and search_keyword.strip():
344
- keyword_lower = search_keyword.strip().lower()
345
- mask = df.astype(str).apply(lambda row: row.str.lower().str.contains(keyword_lower).any(), axis=1)
346
- df = df[mask]
347
 
348
  if df.empty:
349
- empty_html = "<p>No records found.</p>"
350
- return empty_html, []
351
 
352
  df = df.fillna("-")
353
-
354
- # Insert row number column at the beginning
355
  df.insert(0, 'Row #', range(len(df)))
356
 
357
- # Create HTML table
358
  table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>'
359
  df_without_rownum = df.drop('Row #', axis=1)
360
- df_dict = df_without_rownum.to_dict('records')
361
- return table_html, df_dict
362
-
363
-
364
- def auto_refresh_from_dir(
365
- dir_path: str,
366
- selected_tasks: List[str] | None = None,
367
- selected_frameworks: List[str] | None = None,
368
- selected_model_types: List[str] | None = None,
369
- selected_precisions: List[str] | None = None,
370
- search_keyword: str = "",
371
- ):
372
- return load_from_dir(
373
- dir_path,
374
- selected_tasks=selected_tasks,
375
- selected_frameworks=selected_frameworks,
376
- selected_model_types=selected_model_types,
377
- selected_precisions=selected_precisions,
378
- search_keyword=search_keyword,
379
- force_refresh=True,
380
- )
381
 
 
 
382
 
383
- def parse_and_generate_plot(df_data: list, indices_str: str):
384
- """Parse comma-separated indices and generate radar plot."""
385
  if not indices_str or not indices_str.strip():
386
  return generate_radar_plot([])
387
-
388
  try:
389
- # Parse comma-separated indices
390
- indices = [int(idx.strip()) for idx in indices_str.split(',') if idx.strip()]
391
- # Limit to 3 rows
392
- indices = indices[:3]
393
- # Get selected rows
394
  selected_rows = [df_data[i] for i in indices if 0 <= i < len(df_data)]
395
  return generate_radar_plot(selected_rows)
396
- except (ValueError, IndexError):
397
  return generate_radar_plot([])
398
 
399
 
400
- # Gradio UI
401
-
402
  def build_app() -> gr.Blocks:
 
403
  row_css = """
404
- /* ============================================================ */
405
- /* GLOBAL RESET: Force Light Mode Colors Everywhere */
406
- /* ============================================================ */
407
-
408
- /* 1. Main Backgrounds */
409
- body, .gradio-container {
410
- background-color: #f5f7fa !important;
411
- }
412
-
413
- /* 2. Force ALL Text to be Dark */
414
- .gradio-container,
415
- .gradio-container p,
416
- .gradio-container h1, .gradio-container h2, .gradio-container h3,
417
- .gradio-container span,
418
- .gradio-container label,
419
- .gradio-container div,
420
- .gradio-container .prose,
421
- .gradio-container .prose * {
422
- color: #24292e !important;
423
  }
424
 
425
- /* 3. Force Internal Blocks & Forms to be White (Fixes black anchors/backgrounds) */
426
  .gradio-container .block,
427
- .gradio-container .form,
428
- .gradio-container .wrap,
429
- .gradio-container .row,
430
- .gradio-container .column {
431
- background-color: transparent !important;
432
- }
433
-
434
- /* 4. Fix specific Markdown Containers */
435
- .filter-section .prose,
436
- .gradio-container .prose {
437
- background-color: transparent !important;
438
- }
439
-
440
- /* 5. Inputs (Search box) */
441
- .gradio-container input,
442
- .gradio-container textarea,
443
- .gradio-container select {
444
- background-color: #ffffff !important;
445
- color: #24292e !important;
446
  border-color: #e1e4e8 !important;
447
  }
448
 
449
- /* 6. Fix Checkbox Groups */
450
- .gradio-container .wrap.default,
451
- .gradio-container .block {
452
- background-color: transparent !important;
453
- }
454
-
455
- /* The individual checkboxes */
456
- .gradio-container label.svelte-1b8605,
457
- .gradio-container label {
458
- background-color: white !important;
459
- border-color: #e1e4e8 !important;
460
  }
461
 
462
- /* Force checkbox text to be visible */
463
- .gradio-container label span {
 
464
  color: #24292e !important;
465
  }
466
 
467
- /* ============================================================ */
468
- /* COMPONENT SPECIFIC STYLING */
469
- /* ============================================================ */
470
-
471
- /* Search Box Container */
472
- .search-box {
473
- background: white !important; padding: 16px !important;
474
- border-radius: 6px; border: 2px solid #e1e4e8 !important;
475
- box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06); margin-bottom: 16px;
476
  }
477
 
478
- /* Filter Section & Accordion Container */
479
- .filter-section, .gradio-container .accordion {
480
- background: white !important;
481
- border: 2px solid #e1e4e8 !important;
482
- border-radius: 6px !important;
483
- box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
484
  }
485
 
486
- /* Fix the "Anchor" backgrounds (Markdown Headers inside Groups) */
487
- .filter-section h3, .gradio-container h3 {
488
- background-color: white !important;
489
- margin-top: 0 !important;
490
- padding-top: 5px;
491
- }
492
-
493
- /* Accordion Header - Fix for Day/Night button colors */
494
- .gradio-container .accordion button,
495
- .gradio-container .accordion span {
496
- background-color: white !important;
497
  color: #24292e !important;
498
- }
499
- .gradio-container .accordion svg {
500
- fill: #24292e !important; /* Arrows */
501
  }
502
 
503
- /* Info Section (The invisible text area) */
504
- .info-section { padding: 16px; background: white !important; }
505
-
506
- /* Links should remain Blue */
507
- .gradio-container a, .gradio-container .prose a {
508
- color: #0366d6 !important;
509
- text-decoration: none;
510
  }
511
- .gradio-container a:hover { text-decoration: underline; }
512
-
513
- /* Checkbox Accent Color */
514
- .gradio-container input[type="checkbox"] { accent-color: #0366d6 !important; }
515
 
516
- /* ============================================================ */
517
- /* TABLE STYLING */
518
- /* ============================================================ */
519
-
520
  .table-container {
521
- overflow-x: auto; overflow-y: auto; max-height: 75vh;
522
- border: 2px solid #e1e4e8; border-radius: 6px;
523
- background: white !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
 
 
524
  }
525
-
526
- .gradio-container table.metrics-table {
527
- border-collapse: collapse; width: 100%; background: white !important;
528
  }
529
-
530
- .gradio-container table.metrics-table th,
531
- .gradio-container table.metrics-table td {
532
- padding: 10px 14px;
533
- border: 1.5px solid #e1e4e8;
534
- white-space: nowrap;
535
- font-size: 13px;
536
- text-align: left;
537
- background-color: white !important;
538
- color: #24292e !important;
539
  }
540
-
541
- .gradio-container table.metrics-table th {
542
- background: #f6f8fa !important; /* Light grey header */
543
- font-weight: 600; position: sticky; top: 0; z-index: 10;
544
- border-bottom: 2px solid #d1d5da;
545
  }
546
-
547
- /* Row Number Column */
548
  .metrics-table th:first-child, .metrics-table td:first-child {
549
- width: 60px !important; text-align: center !important;
550
- background-color: #f0f0f0 !important;
551
  }
552
 
553
- /* Plot Container */
554
- .gradio-container .plot-container { width: 100% !important; background: white !important; }
 
 
 
 
555
  """
556
 
557
  with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo:
558
  gr.Markdown("# MoE-CAP Dashboard")
559
 
560
  with gr.Row():
561
- # Left side - Filters (narrower)
562
  with gr.Column(scale=2):
563
  with gr.Group(elem_classes="search-box"):
564
- search_input = gr.Textbox(
565
- label="πŸ” Search",
566
- placeholder="Search across all columns...",
567
- lines=1
568
- )
569
 
570
  with gr.Group(elem_classes="filter-section"):
571
  gr.Markdown("### πŸŽ›οΈ Filters")
572
-
573
  dir_path = gr.State(RESULT_DIR)
574
 
575
  task_filter = gr.CheckboxGroup(
576
  label="πŸ“Š Tasks",
577
- choices=[
578
- ("GSM8K", "gsm8k"),
579
- ("LongBench", "longbench"),
580
- ("MMLU", "mmlu"),
581
- ("NuminaMath", "numinamath"),
582
- ("RULER", "ruler")
583
- ],
584
  value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
585
  )
586
-
587
- framework_filter = gr.CheckboxGroup(
588
- label="βš™οΈ Inference Frameworks",
589
- choices=["sglang", "vllm"],
590
- value=["sglang", "vllm"],
591
- )
592
-
593
- model_type_filter = gr.CheckboxGroup(
594
- label="πŸ€– Model Types",
595
- choices=["instruct", "thinking"],
596
- value=["instruct", "thinking"],
597
- )
598
-
599
- precision_filter = gr.CheckboxGroup(
600
- label="🎯 Precision",
601
- choices=["bfloat16", "fp8"],
602
- value=["bfloat16", "fp8"],
603
- )
604
 
605
  with gr.Accordion("πŸ“– About Tasks & Metrics", open=True):
606
  gr.Markdown(
607
- "### Tasks\n"
608
- "- **GSM8K** β€” Mathematics Problem-Solving ([paper](https://arxiv.org/abs/2110-14168))\n"
609
- "- **LongBench** β€” Long-Context Understanding ([paper](https://arxiv.org/abs/2412.15204))\n"
610
- "- **MMLU** β€” Multitask Language Understanding ([paper](https://arxiv.org/abs/2009.03300))\n"
611
- "- **NuminaMath** β€” Mathematical Reasoning ([paper](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf))\n"
612
- "- **RULER** β€” Extreme Long-Context Eval ([paper](https://arxiv.org/abs/2404.06654))\n\n"
613
-
614
- "### Metrics\n"
615
- "- **E2E(s)** β€” End-to-End Latency\n"
616
- "- **Accuracy(%)** β€” Task Accuracy\n"
617
- "- **Cost($)** β€” Inference Cost\n"
618
- "- **Decoding/Prefill T/s** β€” Throughput\n"
619
- "- **S-MBU/MFU(%)** β€” Hardware Utilization\n"
620
- "- **TTFT(s)** β€” Time To First Token\n"
621
- "- **TPOT(s)** β€” Time Per Output Token",
622
  elem_classes="info-section"
623
  )
624
 
625
- # Right side - Table with selection and Radar Plot below
626
  with gr.Column(scale=5):
627
  leaderboard_output = gr.HTML(label="πŸ“ˆ Results")
628
 
629
  with gr.Group(elem_classes="filter-section"):
630
  gr.Markdown("### πŸ“Š CAP Radar Plot")
631
- gr.Markdown(
632
- "**How to use:** Look at the 'Row #' column in the table above. "
633
- "Enter up to 3 row numbers below (separated by commas) and click Generate."
634
- )
635
 
636
  with gr.Row():
637
- row_indices_input = gr.Textbox(
638
- label="Row Numbers to Compare",
639
- placeholder="Example: 0,1,2",
640
- elem_id="row_indices_input",
641
- scale=3
642
- )
643
- generate_btn = gr.Button("🎯 Generate", variant="primary", scale=1, size="lg")
644
 
645
- # Modified Layout: Removed surrounding columns to allow plot to fill full width
646
- radar_plot = gr.Plot(
647
- label="",
648
- value=generate_radar_plot([]),
649
- elem_classes="plot-container"
650
- )
651
 
 
652
  df_data_state = gr.State([])
 
653
 
654
- demo.load(
655
- fn=auto_refresh_from_dir,
656
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
657
- outputs=[leaderboard_output, df_data_state],
658
- )
659
-
660
- search_input.change(
661
- fn=load_from_dir,
662
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
663
- outputs=[leaderboard_output, df_data_state],
664
- )
665
 
666
- task_filter.change(
667
- fn=load_from_dir,
668
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
669
- outputs=[leaderboard_output, df_data_state],
670
- )
671
- framework_filter.change(
672
- fn=load_from_dir,
673
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
674
- outputs=[leaderboard_output, df_data_state],
675
- )
676
- model_type_filter.change(
677
- fn=load_from_dir,
678
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
679
- outputs=[leaderboard_output, df_data_state],
680
- )
681
- precision_filter.change(
682
- fn=load_from_dir,
683
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
684
- outputs=[leaderboard_output, df_data_state],
685
- )
686
-
687
- # Generate plot on button click
688
- generate_btn.click(
689
- fn=parse_and_generate_plot,
690
- inputs=[df_data_state, row_indices_input],
691
- outputs=[radar_plot]
692
- )
693
 
694
- timer = gr.Timer(60.0)
695
- timer.tick(
696
- fn=auto_refresh_from_dir,
697
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
698
- outputs=[leaderboard_output, df_data_state],
699
- )
700
 
701
  return demo
702
-
703
  if __name__ == "__main__":
704
  app = build_app()
705
  app.launch()
 
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
10
+ # For testing purposes, you can uncomment the line below:
11
  # RESULT_DIR = "generic_result_dir"
12
  raise RuntimeError(
13
  "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
 
33
  return baseline + (val - vmin) / (vmax - vmin) * (100 - baseline)
34
 
35
 
 
 
 
 
 
 
 
36
  def normalize_cost(val, max_tick, baseline=20):
37
  """Normalize cost (lower is better)."""
38
  if max_tick == 0:
 
43
  def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
44
  """Generate a CAP radar plot from selected rows."""
45
 
 
46
  layout_settings = dict(
47
+ height=750,
48
+ autosize=True,
49
+ margin=dict(t=80, b=100, l=80, r=80),
50
  paper_bgcolor='white',
51
  plot_bgcolor='white',
52
  )
53
 
 
54
  if not selected_rows_data or len(selected_rows_data) == 0:
55
  fig = go.Figure()
56
  fig.add_annotation(
57
  text="Please select 1-3 rows from the table to generate radar plot",
58
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
59
+ font=dict(size=16, color="black"), # Ensure text is black
60
+ xanchor='center', yanchor='middle'
 
 
 
 
 
 
 
61
  )
62
+ fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False), **layout_settings)
63
  return fig
64
 
65
  if len(selected_rows_data) > 3:
66
  fig = go.Figure()
67
  fig.add_annotation(
68
  text="Error: Please select no more than 3 rows!",
69
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
 
70
  font=dict(size=18, color="red"),
71
+ xanchor='center', yanchor='middle'
 
 
 
 
 
 
72
  )
73
+ fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False), **layout_settings)
74
  return fig
75
 
76
  datasets = [row.get('Dataset', '') for row in selected_rows_data]
 
79
  fig = go.Figure()
80
  fig.add_annotation(
81
  text="Error: Please select rows from the same dataset!",
82
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
 
83
  font=dict(size=18, color="red"),
84
+ xanchor='center', yanchor='middle'
 
 
 
 
 
 
85
  )
86
+ fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False), **layout_settings)
87
  return fig
88
 
89
  dataset_name = datasets[0] if datasets else "Unknown"
90
 
 
91
  data = {}
92
  for row in selected_rows_data:
 
93
  model_name = row.get('Model', 'Unknown')
94
  if isinstance(model_name, str) and 'href' in model_name:
95
  try:
 
97
  except:
98
  pass
99
 
 
100
  method = row.get('Method', '')
101
  if isinstance(model_name, str) and '/' in model_name:
102
+ legend_name = model_name.split('/')[-1]
103
  else:
104
  legend_name = str(model_name)
105
 
 
106
  if method and method not in ['Unknown', '-', '']:
107
  legend_name = f"{legend_name}-{method}"
108
 
 
109
  acc = row.get('Accuracy(%)', 0)
110
  cost = row.get('Cost($)', 0)
111
  throughput = row.get('Decoding T/s', 0)
112
 
 
113
  try:
114
  acc = float(acc) if acc not in [None, '-', ''] else 0
115
  cost = float(cost) if cost not in [None, '-', ''] else 0
 
118
  acc, cost, throughput = 0, 0, 0
119
 
120
  data[legend_name] = {
121
+ 'accuracy': acc / 100.0 if acc > 1 else acc,
122
  'cost': cost,
123
  'throughput': throughput
124
  }
125
 
 
126
  throughputs = [v['throughput'] for v in data.values()]
127
  costs = [v['cost'] for v in data.values()]
128
  accs = [v['accuracy'] for v in data.values()]
 
143
  normalize_cost(values['cost'], cost_max, baseline),
144
  normalize(values['accuracy'], acc_min, acc_max, baseline)
145
  ]
146
+ norm_vals += [norm_vals[0]]
147
 
148
  hovertext = [
149
  f"Throughput: {raw_vals[0]:.2f} T/s",
 
163
  ))
164
 
165
  fig.update_layout(
166
+ title=dict(text=f"CAP Radar Plot: {dataset_name}", x=0.5, xanchor='center', font=dict(size=20, color="black")),
 
 
 
 
 
167
  polar=dict(
168
+ radialaxis=dict(visible=True, range=[0, 100], tickfont=dict(size=12, color="black")),
169
+ angularaxis=dict(tickfont=dict(size=14, color="black"), rotation=90, direction='clockwise'),
170
+ bgcolor="white"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  ),
172
+ legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5, font=dict(size=13, color="black")),
173
  **layout_settings
174
  )
175
 
 
182
  model_name = "unknown-model"
183
 
184
  dataset = metrics.get("dataset", "Unknown")
 
185
  method = metrics.get("method", "Unknown")
186
  precision = metrics.get("precision", "Unknown")
187
  model_type = metrics.get("model_type", "Unknown")
 
188
  e2e_s = metrics.get("e2e_s", None)
189
  batch_size = metrics.get("batch_size", None)
190
  gpu_type = metrics.get("gpu_type", "")
 
203
 
204
  if isinstance(model_name, str) and "/" in model_name:
205
  hf_url = f"https://huggingface.co/{model_name}"
206
+ model_cell = f"<a href='{hf_url}' target='_blank' style='color: #0366d6; text-decoration: none;'>{model_name}</a>"
207
  else:
208
  model_cell = model_name
209
 
 
230
  return row
231
 
232
 
233
+ def load_from_dir(dir_path: str, selected_tasks=None, selected_frameworks=None, selected_model_types=None, selected_precisions=None, search_keyword="", force_refresh=False):
 
 
 
 
 
 
 
 
234
  try:
235
  pattern = f"hf://datasets/{dir_path}/**/*.json"
236
  dl_mode = "force_redownload" if force_refresh else None
 
237
  print(f"Fetching from {pattern} (mode={dl_mode})...")
238
+ ds = load_dataset("json", data_files={"train": pattern}, split="train", download_mode=dl_mode)
239
+ except Exception:
240
+ return "<p style='color:black'>No files loaded or Dataset not found.</p>", []
 
 
 
 
 
 
241
 
242
  rows = []
243
  for i, example in enumerate(ds):
244
+ metrics = example.get("metrics") or example.get("json") or example
 
 
 
245
  rows.append(json_to_row(f"{dir_path}#{i}", metrics))
246
 
247
  if not rows:
248
+ return "<p style='color:black'>No records found.</p>", []
 
249
 
250
  df = pd.DataFrame(rows)
251
 
252
+ if selected_tasks:
253
+ df = df[df["Dataset"].astype(str).str.lower().isin([x.lower() for x in selected_tasks])]
254
+ if selected_frameworks:
255
+ df = df[df["Method"].astype(str).str.lower().isin([str(x).lower() for x in selected_frameworks])]
256
+ if selected_model_types:
257
+ df = df[df["Model type"].astype(str).str.lower().isin([str(x).lower() for x in selected_model_types])]
258
+ if selected_precisions:
259
+ df = df[df["Precision"].astype(str).str.lower().isin([str(x).lower() for x in selected_precisions])]
 
 
 
 
 
 
 
 
 
260
  if search_keyword and search_keyword.strip():
261
+ df = df[df.astype(str).apply(lambda row: row.str.lower().str.contains(search_keyword.strip().lower()).any(), axis=1)]
 
 
262
 
263
  if df.empty:
264
+ return "<p style='color:black'>No records found.</p>", []
 
265
 
266
  df = df.fillna("-")
 
 
267
  df.insert(0, 'Row #', range(len(df)))
268
 
 
269
  table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>'
270
  df_without_rownum = df.drop('Row #', axis=1)
271
+ return table_html, df_without_rownum.to_dict('records')
272
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
+ def auto_refresh_from_dir(dir_path, tasks, frameworks, types, precisions, search):
275
+ return load_from_dir(dir_path, tasks, frameworks, types, precisions, search, force_refresh=True)
276
 
277
+ def parse_and_generate_plot(df_data, indices_str):
 
278
  if not indices_str or not indices_str.strip():
279
  return generate_radar_plot([])
 
280
  try:
281
+ indices = [int(idx.strip()) for idx in indices_str.split(',') if idx.strip()][:3]
 
 
 
 
282
  selected_rows = [df_data[i] for i in indices if 0 <= i < len(df_data)]
283
  return generate_radar_plot(selected_rows)
284
+ except:
285
  return generate_radar_plot([])
286
 
287
 
 
 
288
  def build_app() -> gr.Blocks:
289
+ # NUCLEAR CSS FIX: Overwrite all generic Gradio variables to force light mode
290
  row_css = """
291
+ /* 1. FORCE LIGHT VARIABLES GLOBALLY */
292
+ :root, .gradio-container, body {
293
+ --body-background-fill: #f5f7fa !important;
294
+ --body-text-color: #374151 !important;
295
+ --background-fill-primary: #ffffff !important;
296
+ --background-fill-secondary: #f3f4f6 !important;
297
+ --border-color-primary: #e5e7eb !important;
298
+ --block-background-fill: #ffffff !important;
299
+ --block-label-text-color: #374151 !important;
300
+ --block-title-text-color: #1f2937 !important;
301
+ --input-background-fill: #ffffff !important;
302
+ --color-accent: #0366d6 !important;
303
+
304
+ /* Reset dark mode specific variables to light values */
305
+ --neutral-50: #f9fafb; --neutral-100: #f3f4f6; --neutral-200: #e5e7eb;
306
+ --neutral-300: #d1d5da; --neutral-400: #9ca3af; --neutral-500: #6b7280;
307
+ --neutral-600: #4b5563; --neutral-700: #374151; --neutral-800: #1f2937;
 
 
308
  }
309
 
310
+ /* 2. RESET STANDARD CONTAINERS */
311
  .gradio-container .block,
312
+ .gradio-container .panel,
313
+ .gradio-container .form {
314
+ background-color: white !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  border-color: #e1e4e8 !important;
316
  }
317
 
318
+ /* 3. SPECIFIC FIX FOR THE DARK "FILTERS" and "RADAR" SECTIONS */
319
+ /* This targets the class you added in python: elem_classes="filter-section" */
320
+ .filter-section {
321
+ background-color: #ffffff !important;
322
+ border: 2px solid #e1e4e8 !important;
323
+ border-radius: 8px !important;
324
+ padding: 16px !important;
325
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05) !important;
 
 
 
326
  }
327
 
328
+ /* Ensure NO child elements inside filter-section have dark backgrounds */
329
+ .filter-section * {
330
+ background-color: transparent !important;
331
  color: #24292e !important;
332
  }
333
 
334
+ /* Re-apply white background to inputs specifically */
335
+ .filter-section input,
336
+ .filter-section textarea,
337
+ .filter-section select {
338
+ background-color: #ffffff !important;
339
+ border: 1px solid #d1d5da !important;
340
+ color: #24292e !important;
 
 
341
  }
342
 
343
+ /* Fix Checkboxes: Ensure the box itself is visible */
344
+ .filter-section input[type="checkbox"] {
345
+ background-color: #ffffff !important;
346
+ border: 1px solid #d1d5da !important;
347
+ accent-color: #0366d6 !important;
 
348
  }
349
 
350
+ /* Fix "How to use" Text (Markdown/Prose) */
351
+ .filter-section .prose,
352
+ .filter-section .prose p,
353
+ .filter-section .prose strong {
 
 
 
 
 
 
 
354
  color: #24292e !important;
355
+ opacity: 1 !important;
 
 
356
  }
357
 
358
+ /* 4. SEARCH BOX */
359
+ .search-box {
360
+ background: white !important;
361
+ padding: 16px !important;
362
+ border-radius: 6px;
363
+ border: 2px solid #e1e4e8 !important;
364
+ margin-bottom: 16px;
365
  }
 
 
 
 
366
 
367
+ /* 5. TABLE STYLING */
 
 
 
368
  .table-container {
369
+ overflow-x: auto;
370
+ max-height: 75vh;
371
+ border: 2px solid #e1e4e8;
372
+ border-radius: 6px;
373
+ background: white !important;
374
  }
375
+ table.metrics-table {
376
+ width: 100%; border-collapse: collapse; background: white !important;
 
377
  }
378
+ table.metrics-table th, table.metrics-table td {
379
+ padding: 10px 14px; border: 1px solid #e1e4e8;
380
+ white-space: nowrap; font-size: 13px; color: #24292e !important;
 
 
 
 
 
 
 
381
  }
382
+ table.metrics-table th {
383
+ background: #f6f8fa !important; font-weight: 600; position: sticky; top: 0;
 
 
 
384
  }
 
 
385
  .metrics-table th:first-child, .metrics-table td:first-child {
386
+ background-color: #f0f0f0 !important; text-align: center;
 
387
  }
388
 
389
+ /* 6. PLOT CONTAINER */
390
+ .plot-container { width: 100% !important; background: white !important; }
391
+
392
+ /* 7. LINKS */
393
+ a { color: #0366d6 !important; text-decoration: none; }
394
+ a:hover { text-decoration: underline; }
395
  """
396
 
397
  with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo:
398
  gr.Markdown("# MoE-CAP Dashboard")
399
 
400
  with gr.Row():
401
+ # Left Sidebar
402
  with gr.Column(scale=2):
403
  with gr.Group(elem_classes="search-box"):
404
+ search_input = gr.Textbox(label="πŸ” Search", placeholder="Search...", lines=1)
 
 
 
 
405
 
406
  with gr.Group(elem_classes="filter-section"):
407
  gr.Markdown("### πŸŽ›οΈ Filters")
 
408
  dir_path = gr.State(RESULT_DIR)
409
 
410
  task_filter = gr.CheckboxGroup(
411
  label="πŸ“Š Tasks",
412
+ choices=[("GSM8K", "gsm8k"), ("LongBench", "longbench"), ("MMLU", "mmlu"), ("NuminaMath", "numinamath"), ("RULER", "ruler")],
 
 
 
 
 
 
413
  value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
414
  )
415
+ framework_filter = gr.CheckboxGroup(label="βš™οΈ Frameworks", choices=["sglang", "vllm"], value=["sglang", "vllm"])
416
+ model_type_filter = gr.CheckboxGroup(label="πŸ€– Model Types", choices=["instruct", "thinking"], value=["instruct", "thinking"])
417
+ precision_filter = gr.CheckboxGroup(label="🎯 Precision", choices=["bfloat16", "fp8"], value=["bfloat16", "fp8"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  with gr.Accordion("πŸ“– About Tasks & Metrics", open=True):
420
  gr.Markdown(
421
+ "### Tasks\n- **GSM8K**, **LongBench**, **MMLU**, **NuminaMath**, **RULER**\n\n"
422
+ "### Metrics\n- **E2E(s)**: Latency | **Cost($)** | **T/s**: Throughput | **S-MBU/MFU**: Utilization",
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  elem_classes="info-section"
424
  )
425
 
426
+ # Right Main Content
427
  with gr.Column(scale=5):
428
  leaderboard_output = gr.HTML(label="πŸ“ˆ Results")
429
 
430
  with gr.Group(elem_classes="filter-section"):
431
  gr.Markdown("### πŸ“Š CAP Radar Plot")
432
+ gr.Markdown("**How to use:** Look at the 'Row #' column in the table. Enter row numbers (e.g., 0,1,2) and click Generate.")
 
 
 
433
 
434
  with gr.Row():
435
+ row_indices_input = gr.Textbox(label="Row Numbers", placeholder="0,1,2", scale=3)
436
+ generate_btn = gr.Button("🎯 Generate", variant="primary", scale=1)
 
 
 
 
 
437
 
438
+ radar_plot = gr.Plot(value=generate_radar_plot([]), elem_classes="plot-container")
 
 
 
 
 
439
 
440
+ # State & Events
441
  df_data_state = gr.State([])
442
+ inputs = [dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input]
443
 
444
+ demo.load(fn=auto_refresh_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
445
+ search_input.change(fn=load_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
446
+ task_filter.change(fn=load_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
447
+ framework_filter.change(fn=load_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
448
+ model_type_filter.change(fn=load_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
449
+ precision_filter.change(fn=load_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
 
 
 
 
 
450
 
451
+ generate_btn.click(fn=parse_and_generate_plot, inputs=[df_data_state, row_indices_input], outputs=[radar_plot])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
+ gr.Timer(60.0).tick(fn=auto_refresh_from_dir, inputs=inputs, outputs=[leaderboard_output, df_data_state])
 
 
 
 
 
454
 
455
  return demo
456
+
457
  if __name__ == "__main__":
458
  app = build_app()
459
  app.launch()