arnauad3 commited on
Commit
7dd3ffd
·
1 Parent(s): 091340b

Other Models Leaderboard

Browse files
Files changed (5) hide show
  1. app.py +122 -65
  2. config/constants.py +11 -0
  3. data_processing.py +13 -6
  4. handlers/leaderboard_handlers.py +4 -2
  5. utils.py +32 -4
app.py CHANGED
@@ -19,6 +19,78 @@ from static.html_content import (
19
  from style.css_html_js import custom_css
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
23
  # Load csv results
24
  df_icarus = read_dataframe(C.ICARUS_RESULTS)
@@ -44,66 +116,33 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emeral
44
  gr.HTML(NAV_BUTTONS_HTML)
45
  gr.HTML(INTRO_HTML)
46
 
 
 
47
  # Main view
48
  with gr.Tabs() as tabs:
49
  # Leaderboard
50
- with gr.Tab("Leaderboard"):
51
- # 1st row filters (select task, benchmark and sim)
52
- with gr.Row(equal_height=True):
53
- with gr.Column(scale=4):
54
- task_radio = gr.Radio(choices=C.TASKS, label="Select Task", value=C.DEFAULT_TASK)
55
- with gr.Column(scale=3):
56
- benchmark_radio = gr.Radio(
57
- choices=[C.DEFAULT_BENCHMARK] + C.S2R_BENCHMARKS,
58
- label="Select Benchmark",
59
- value=C.DEFAULT_BENCHMARK,
60
- )
61
- with gr.Column(scale=2, min_width=180):
62
- simulator_radio = gr.Radio(
63
- choices=C.SIMULATORS,
64
- value=C.SIMULATORS[0],
65
- label="Select Simulator",
66
- scale=1,
67
- )
68
-
69
- # 2nd row filters (search, model type, params)
70
- with gr.Row(equal_height=True):
71
- search_box = gr.Textbox(
72
- label="Search Model",
73
- placeholder="Type model name...",
74
- scale=2,
75
- )
76
- model_type_dropdown = gr.Radio(
77
- choices=C.MODEL_TYPES,
78
- label="Select Model Type",
79
- value=C.DEFAULT_MODEL_TYPE,
80
- scale=3,
81
- )
82
- params_slider = gr.Slider(
83
- minimum=state.get_current_df()["Params"].min(),
84
- maximum=C.DEFAULT_MAX_PARAMS,
85
- value=C.DEFAULT_MAX_PARAMS,
86
- label="Max Params",
87
- step=1,
88
- scale=2,
89
- )
90
-
91
- # main leaderboard content
92
- leaderboard = gr.DataFrame(
93
- value=filter_leaderboard(
94
- C.DEFAULT_TASK, C.DEFAULT_BENCHMARK, C.DEFAULT_MODEL_TYPE, "", C.DEFAULT_MAX_PARAMS, state
95
- ),
96
- headers="first row",
97
- show_row_numbers=True,
98
- wrap=True,
99
- datatype=["html", "html"],
100
- interactive=False,
101
- column_widths=["7%", "28%", "13%", "10%", "13%", "10%", "14%"],
102
- elem_classes="dataframe-leaderboard",
103
- )
104
-
105
- # caption for the Base vs Instruct models
106
- gr.HTML(LC_FOOTNOTE_HTML)
107
 
108
  # all plots using Plotly
109
  with gr.Tab("Plot View"):
@@ -161,17 +200,35 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emeral
161
  create_leaderboard_handlers(
162
  filter_leaderboard_fn=filter_leaderboard,
163
  generate_scatter_plot_fn=generate_scatter_plot,
164
- task_radio=task_radio,
165
- benchmark_radio=benchmark_radio,
166
- model_type_dropdown=model_type_dropdown,
167
- search_box=search_box,
168
- params_slider=params_slider,
169
  bubble_benchmark=bubble_benchmark,
170
  bubble_metric=bubble_metric,
171
  scatter_plot=scatter_plot,
172
- leaderboard=leaderboard,
173
- simulator_radio=simulator_radio,
174
  state=state,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  )
176
 
177
 
@@ -181,4 +238,4 @@ app.launch(
181
  "hpai_logo_grad.png",
182
  "bsc-logo.png",
183
  ]
184
- )
 
19
  from style.css_html_js import custom_css
20
 
21
 
22
+ def make_leaderboard_tab(state: Simulator, name: str):
23
+ """Create a leaderboard tab with the given name and state."""
24
+ with gr.Tab(name):
25
+ # 1st row filters (select task, benchmark and sim)
26
+ with gr.Row(equal_height=True):
27
+ with gr.Column(scale=4):
28
+ task_radio = gr.Radio(choices=C.TASKS, label="Select Task", value=C.DEFAULT_TASK)
29
+ with gr.Column(scale=3):
30
+ benchmark_radio = gr.Radio(
31
+ choices=[C.DEFAULT_BENCHMARK] + C.S2R_BENCHMARKS,
32
+ label="Select Benchmark",
33
+ value=C.DEFAULT_BENCHMARK,
34
+ )
35
+ with gr.Column(scale=2, min_width=180):
36
+ simulator_radio = gr.Radio(
37
+ choices=C.SIMULATORS,
38
+ value=C.SIMULATORS[0],
39
+ label="Select Simulator",
40
+ scale=1,
41
+ )
42
+
43
+ # 2nd row filters (search, model type, params)
44
+ with gr.Row(equal_height=True):
45
+ search_box = gr.Textbox(
46
+ label="Search Model",
47
+ placeholder="Type model name...",
48
+ scale=2,
49
+ )
50
+ model_type_dropdown = gr.Radio(
51
+ choices=C.MODEL_TYPES,
52
+ label="Select Model Type",
53
+ value=C.DEFAULT_MODEL_TYPE,
54
+ scale=3,
55
+ )
56
+ params_slider = gr.Slider(
57
+ minimum=state.get_current_df()["Params"].min(),
58
+ maximum=C.DEFAULT_MAX_PARAMS,
59
+ value=C.DEFAULT_MAX_PARAMS,
60
+ label="Max Params",
61
+ step=1,
62
+ scale=2,
63
+ )
64
+
65
+ if name == "Other Models":
66
+ show = False
67
+ else:
68
+ show = True
69
+ # main leaderboard content
70
+ leaderboard = gr.DataFrame(
71
+ value=filter_leaderboard(C.DEFAULT_TASK, C.DEFAULT_BENCHMARK, C.DEFAULT_MODEL_TYPE, "", C.DEFAULT_MAX_PARAMS, state, name),
72
+ headers="first row",
73
+ show_row_numbers=show,
74
+ wrap=True,
75
+ datatype=["html", "html"],
76
+ interactive=False,
77
+ column_widths=["7%", "28%", "13%", "10%", "13%", "10%", "14%"],
78
+ elem_classes="dataframe-leaderboard",
79
+ )
80
+
81
+ # caption for the Base vs Instruct models
82
+ gr.HTML(LC_FOOTNOTE_HTML)
83
+
84
+ return (
85
+ task_radio,
86
+ benchmark_radio,
87
+ simulator_radio,
88
+ search_box,
89
+ model_type_dropdown,
90
+ params_slider,
91
+ leaderboard,
92
+ )
93
+
94
  with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
95
  # Load csv results
96
  df_icarus = read_dataframe(C.ICARUS_RESULTS)
 
116
  gr.HTML(NAV_BUTTONS_HTML)
117
  gr.HTML(INTRO_HTML)
118
 
119
+
120
+
121
  # Main view
122
  with gr.Tabs() as tabs:
123
  # Leaderboard
124
+ name_main = "Lastest Leaderboard"
125
+ (
126
+ task_radio_main,
127
+ benchmark_radio_main,
128
+ simulator_radio_main,
129
+ search_box_main,
130
+ model_type_dropdown_main,
131
+ params_slider_main,
132
+ leaderboard_main,
133
+ ) = make_leaderboard_tab(state, name_main)
134
+
135
+ # Other models
136
+ name_other = "Other Models"
137
+ (
138
+ task_radio_other,
139
+ benchmark_radio_other,
140
+ simulator_radio_other,
141
+ search_box_other,
142
+ model_type_dropdown_other,
143
+ params_slider_other,
144
+ leaderboard_other,
145
+ ) = make_leaderboard_tab(state, name_other)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # all plots using Plotly
148
  with gr.Tab("Plot View"):
 
200
  create_leaderboard_handlers(
201
  filter_leaderboard_fn=filter_leaderboard,
202
  generate_scatter_plot_fn=generate_scatter_plot,
203
+ task_radio=task_radio_main,
204
+ benchmark_radio=benchmark_radio_main,
205
+ model_type_dropdown=model_type_dropdown_main,
206
+ search_box=search_box_main,
207
+ params_slider=params_slider_main,
208
  bubble_benchmark=bubble_benchmark,
209
  bubble_metric=bubble_metric,
210
  scatter_plot=scatter_plot,
211
+ leaderboard=leaderboard_main,
212
+ simulator_radio=simulator_radio_main,
213
  state=state,
214
+ name=name_main,
215
+ )
216
+
217
+ create_leaderboard_handlers(
218
+ filter_leaderboard_fn=filter_leaderboard,
219
+ generate_scatter_plot_fn=generate_scatter_plot,
220
+ task_radio=task_radio_other,
221
+ benchmark_radio=benchmark_radio_other,
222
+ model_type_dropdown=model_type_dropdown_other,
223
+ search_box=search_box_other,
224
+ params_slider=params_slider_other,
225
+ bubble_benchmark=bubble_benchmark,
226
+ bubble_metric=bubble_metric,
227
+ scatter_plot=scatter_plot,
228
+ leaderboard=leaderboard_other,
229
+ simulator_radio=simulator_radio_other,
230
+ state=state,
231
+ name=name_other,
232
  )
233
 
234
 
 
238
  "hpai_logo_grad.png",
239
  "bsc-logo.png",
240
  ]
241
+ )
config/constants.py CHANGED
@@ -4,6 +4,17 @@ VERILATOR_RESULTS = f"{RESULTS_DIR}/results_verilator.json"
4
  ICARUS_AGG = f"{RESULTS_DIR}/aggregated_scores_icarus.csv"
5
  VERILATOR_AGG = f"{RESULTS_DIR}/aggregated_scores_verilator.csv"
6
 
 
 
 
 
 
 
 
 
 
 
 
7
  TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion †"]
8
  S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
9
  CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
 
4
  ICARUS_AGG = f"{RESULTS_DIR}/aggregated_scores_icarus.csv"
5
  VERILATOR_AGG = f"{RESULTS_DIR}/aggregated_scores_verilator.csv"
6
 
7
+
8
+ DISCARDED_MODELS = {
9
+
10
+ }
11
+
12
+ """
13
+ "DeepSeek R1": "10/10/2025",
14
+ "QwenCoder 2.5 7B": "11/10/2025",
15
+ "RTLCoder Mistral": "14/10/2025"
16
+ """
17
+
18
  TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion †"]
19
  S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
20
  CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
data_processing.py CHANGED
@@ -10,6 +10,7 @@ from config.constants import (
10
  SCATTER_PLOT_X_TICKS,
11
  TYPE_COLORS,
12
  Y_AXIS_LIMITS,
 
13
  )
14
  from utils import filter_bench, filter_bench_all, filter_RTLRepo, handle_special_cases
15
 
@@ -40,7 +41,7 @@ class Simulator:
40
 
41
 
42
  # filtering main function for the leaderboard body
43
- def filter_leaderboard(task, benchmark, model_type, search_query, max_params, state):
44
  """Filter leaderboard data based on user selections."""
45
  subset = state.get_current_df().copy()
46
 
@@ -69,15 +70,20 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
69
  max_params = float(max_params)
70
  subset = subset[subset["Params"] <= max_params]
71
 
 
 
 
 
 
72
  if benchmark == "All":
73
  if task == "Spec-to-RTL":
74
- return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg S2R")
75
  elif task == "Code Completion":
76
- return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg MC")
77
  elif task == "Line Completion †":
78
- return filter_RTLRepo(subset)
79
  elif benchmark == "RTL-Repo":
80
- return filter_RTLRepo(subset)
81
  else:
82
  agg_column = None
83
  if benchmark == "VerilogEval S2R":
@@ -89,7 +95,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
89
  elif benchmark == "VeriGen":
90
  agg_column = "Agg VeriGen"
91
 
92
- return filter_bench(subset, state.get_current_agg(), agg_column)
93
 
94
 
95
  def generate_scatter_plot(benchmark, metric, state):
@@ -97,6 +103,7 @@ def generate_scatter_plot(benchmark, metric, state):
97
  benchmark, metric = handle_special_cases(benchmark, metric)
98
 
99
  subset = state.get_current_df()[state.get_current_df()["Benchmark"] == benchmark]
 
100
  if benchmark == "RTL-Repo":
101
  subset = subset[subset["Metric"].str.contains("EM", case=False, na=False)]
102
  detailed_scores = subset.groupby("Model", as_index=False)["Score"].mean()
 
10
  SCATTER_PLOT_X_TICKS,
11
  TYPE_COLORS,
12
  Y_AXIS_LIMITS,
13
+ DISCARDED_MODELS,
14
  )
15
  from utils import filter_bench, filter_bench_all, filter_RTLRepo, handle_special_cases
16
 
 
41
 
42
 
43
  # filtering main function for the leaderboard body
44
+ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, state, name):
45
  """Filter leaderboard data based on user selections."""
46
  subset = state.get_current_df().copy()
47
 
 
70
  max_params = float(max_params)
71
  subset = subset[subset["Params"] <= max_params]
72
 
73
+ if name == "Other Models":
74
+ subset = subset[subset["Model"].isin(DISCARDED_MODELS)]
75
+ else:
76
+ subset = subset[~subset["Model"].isin(DISCARDED_MODELS)]
77
+
78
  if benchmark == "All":
79
  if task == "Spec-to-RTL":
80
+ return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg S2R", name=name)
81
  elif task == "Code Completion":
82
+ return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg MC", name=name)
83
  elif task == "Line Completion †":
84
+ return filter_RTLRepo(subset, name=name)
85
  elif benchmark == "RTL-Repo":
86
+ return filter_RTLRepo(subset, name=name)
87
  else:
88
  agg_column = None
89
  if benchmark == "VerilogEval S2R":
 
95
  elif benchmark == "VeriGen":
96
  agg_column = "Agg VeriGen"
97
 
98
+ return filter_bench(subset, state.get_current_agg(), agg_column, name=name)
99
 
100
 
101
  def generate_scatter_plot(benchmark, metric, state):
 
103
  benchmark, metric = handle_special_cases(benchmark, metric)
104
 
105
  subset = state.get_current_df()[state.get_current_df()["Benchmark"] == benchmark]
106
+ subset = subset[~subset["Model"].isin(DISCARDED_MODELS)]
107
  if benchmark == "RTL-Repo":
108
  subset = subset[subset["Metric"].str.contains("EM", case=False, na=False)]
109
  detailed_scores = subset.groupby("Model", as_index=False)["Score"].mean()
handlers/leaderboard_handlers.py CHANGED
@@ -26,6 +26,7 @@ def create_leaderboard_handlers(
26
  leaderboard,
27
  simulator_radio,
28
  state,
 
29
  ):
30
  def update_benchmarks_by_task(task):
31
  if task == "Spec-to-RTL":
@@ -45,6 +46,7 @@ def create_leaderboard_handlers(
45
  search_box.value,
46
  params_slider.value,
47
  state,
 
48
  )
49
  return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
50
 
@@ -77,7 +79,7 @@ def create_leaderboard_handlers(
77
  ):
78
  state.set_simulator(simulator)
79
 
80
- leaderboard_df = filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state)
81
  fig = generate_scatter_plot_fn(plot_bench, plot_metric, state)
82
  return leaderboard_df, fig
83
 
@@ -88,7 +90,7 @@ def create_leaderboard_handlers(
88
  )
89
 
90
  def filter_with_state(task, benchmark, model_type, search, max_params):
91
- return filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state)
92
 
93
  benchmark_radio.change(
94
  fn=filter_with_state,
 
26
  leaderboard,
27
  simulator_radio,
28
  state,
29
+ name,
30
  ):
31
  def update_benchmarks_by_task(task):
32
  if task == "Spec-to-RTL":
 
46
  search_box.value,
47
  params_slider.value,
48
  state,
49
+ name,
50
  )
51
  return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
52
 
 
79
  ):
80
  state.set_simulator(simulator)
81
 
82
+ leaderboard_df = filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state, name)
83
  fig = generate_scatter_plot_fn(plot_bench, plot_metric, state)
84
  return leaderboard_df, fig
85
 
 
90
  )
91
 
92
  def filter_with_state(task, benchmark, model_type, search, max_params):
93
+ return filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state, name)
94
 
95
  benchmark_radio.change(
96
  fn=filter_with_state,
utils.py CHANGED
@@ -5,8 +5,9 @@ import numpy as np
5
  import pandas as pd
6
  import plotly.express as px
7
  import plotly.graph_objects as go
 
8
 
9
- from config.constants import COLUMN_MAPPINGS, COLUMN_ORDER, TYPE_EMOJI
10
 
11
 
12
  def model_hyperlink(link, model_name, release, thinking=False):
@@ -20,6 +21,20 @@ def model_hyperlink(link, model_name, release, thinking=False):
20
  return ret + reasoning_badge + new_badge if thinking == "Reasoning" else ret + new_badge
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def handle_special_cases(benchmark, metric):
24
  if metric == "Exact Matching (EM)":
25
  benchmark = "RTL-Repo"
@@ -28,7 +43,7 @@ def handle_special_cases(benchmark, metric):
28
  return benchmark, metric
29
 
30
 
31
- def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
32
  if subset.empty:
33
  return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"])
34
 
@@ -42,6 +57,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
42
  "Model"
43
  )
44
  filtered_df = subset[["Model", "Score"]].rename(columns={"Score": "Exact Matching (EM)"})
 
45
  filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
46
  filtered_df["Model"] = filtered_df.apply(
47
  lambda row: model_hyperlink(
@@ -54,10 +70,13 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
54
  filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))
55
  filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]]
56
  filtered_df = filtered_df.sort_values(by="Exact Matching (EM)", ascending=False).reset_index(drop=True)
 
 
 
57
  return filtered_df
58
 
59
 
60
- def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
61
  if subset.empty:
62
  return pd.DataFrame(columns=COLUMN_ORDER)
63
 
@@ -85,6 +104,8 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
85
  # else: # fallback
86
  # pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)
87
 
 
 
88
  pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
89
  pivot_df["Model"] = pivot_df.apply(
90
  lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]),
@@ -95,12 +116,16 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
95
  if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]):
96
  pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2)
97
 
 
98
  pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True)
99
  pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]]
100
 
101
  if "Functionality" in pivot_df.columns:
102
  pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
103
 
 
 
 
104
  return pivot_df
105
 
106
 
@@ -128,7 +153,7 @@ def custom_agg_cc(vals):
128
  return round(result, 2)
129
 
130
 
131
- def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
132
  if subset.empty:
133
  return pd.DataFrame(columns=COLUMN_ORDER)
134
 
@@ -164,4 +189,7 @@ def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.D
164
  if "Functionality" in pivot_df.columns:
165
  pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
166
 
 
 
 
167
  return pivot_df
 
5
  import pandas as pd
6
  import plotly.express as px
7
  import plotly.graph_objects as go
8
+ import re
9
 
10
+ from config.constants import COLUMN_MAPPINGS, COLUMN_ORDER, TYPE_EMOJI, DISCARDED_MODELS
11
 
12
 
13
  def model_hyperlink(link, model_name, release, thinking=False):
 
21
  return ret + reasoning_badge + new_badge if thinking == "Reasoning" else ret + new_badge
22
 
23
 
24
+ def extract_name_from_link(html: str) -> str:
25
+ """
26
+ Extracts the model name from the HTML generated by model_hyperlink()
27
+ """
28
+ if not isinstance(html, str):
29
+ return html
30
+
31
+ match = re.search(r'<a[^>]*>(.*?)</a>', html)
32
+ if match:
33
+ return match.group(1).strip()
34
+
35
+ return re.sub(r'<[^>]+>', '', html).strip()
36
+
37
+
38
  def handle_special_cases(benchmark, metric):
39
  if metric == "Exact Matching (EM)":
40
  benchmark = "RTL-Repo"
 
43
  return benchmark, metric
44
 
45
 
46
+ def filter_RTLRepo(subset: pd.DataFrame, name=str) -> pd.DataFrame:
47
  if subset.empty:
48
  return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"])
49
 
 
57
  "Model"
58
  )
59
  filtered_df = subset[["Model", "Score"]].rename(columns={"Score": "Exact Matching (EM)"})
60
+
61
  filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
62
  filtered_df["Model"] = filtered_df.apply(
63
  lambda row: model_hyperlink(
 
70
  filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))
71
  filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]]
72
  filtered_df = filtered_df.sort_values(by="Exact Matching (EM)", ascending=False).reset_index(drop=True)
73
+
74
+ if name == "Other Models":
75
+ filtered_df["Date Discarded"] = filtered_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
76
  return filtered_df
77
 
78
 
79
+ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame:
80
  if subset.empty:
81
  return pd.DataFrame(columns=COLUMN_ORDER)
82
 
 
104
  # else: # fallback
105
  # pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)
106
 
107
+
108
+
109
  pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
110
  pivot_df["Model"] = pivot_df.apply(
111
  lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]),
 
116
  if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]):
117
  pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2)
118
 
119
+
120
  pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True)
121
  pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]]
122
 
123
  if "Functionality" in pivot_df.columns:
124
  pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
125
 
126
+ if name != "Other Models":
127
+ pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
128
+
129
  return pivot_df
130
 
131
 
 
153
  return round(result, 2)
154
 
155
 
156
+ def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame:
157
  if subset.empty:
158
  return pd.DataFrame(columns=COLUMN_ORDER)
159
 
 
189
  if "Functionality" in pivot_df.columns:
190
  pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
191
 
192
+ if name == "Other Models":
193
+ pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
194
+
195
  return pivot_df