Spaces:
Running
Running
Other Models Leaderboard
Browse files- app.py +122 -65
- config/constants.py +11 -0
- data_processing.py +13 -6
- handlers/leaderboard_handlers.py +4 -2
- utils.py +32 -4
app.py
CHANGED
|
@@ -19,6 +19,78 @@ from static.html_content import (
|
|
| 19 |
from style.css_html_js import custom_css
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
|
| 23 |
# Load csv results
|
| 24 |
df_icarus = read_dataframe(C.ICARUS_RESULTS)
|
|
@@ -44,66 +116,33 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emeral
|
|
| 44 |
gr.HTML(NAV_BUTTONS_HTML)
|
| 45 |
gr.HTML(INTRO_HTML)
|
| 46 |
|
|
|
|
|
|
|
| 47 |
# Main view
|
| 48 |
with gr.Tabs() as tabs:
|
| 49 |
# Leaderboard
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
label="Search Model",
|
| 73 |
-
placeholder="Type model name...",
|
| 74 |
-
scale=2,
|
| 75 |
-
)
|
| 76 |
-
model_type_dropdown = gr.Radio(
|
| 77 |
-
choices=C.MODEL_TYPES,
|
| 78 |
-
label="Select Model Type",
|
| 79 |
-
value=C.DEFAULT_MODEL_TYPE,
|
| 80 |
-
scale=3,
|
| 81 |
-
)
|
| 82 |
-
params_slider = gr.Slider(
|
| 83 |
-
minimum=state.get_current_df()["Params"].min(),
|
| 84 |
-
maximum=C.DEFAULT_MAX_PARAMS,
|
| 85 |
-
value=C.DEFAULT_MAX_PARAMS,
|
| 86 |
-
label="Max Params",
|
| 87 |
-
step=1,
|
| 88 |
-
scale=2,
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
# main leaderboard content
|
| 92 |
-
leaderboard = gr.DataFrame(
|
| 93 |
-
value=filter_leaderboard(
|
| 94 |
-
C.DEFAULT_TASK, C.DEFAULT_BENCHMARK, C.DEFAULT_MODEL_TYPE, "", C.DEFAULT_MAX_PARAMS, state
|
| 95 |
-
),
|
| 96 |
-
headers="first row",
|
| 97 |
-
show_row_numbers=True,
|
| 98 |
-
wrap=True,
|
| 99 |
-
datatype=["html", "html"],
|
| 100 |
-
interactive=False,
|
| 101 |
-
column_widths=["7%", "28%", "13%", "10%", "13%", "10%", "14%"],
|
| 102 |
-
elem_classes="dataframe-leaderboard",
|
| 103 |
-
)
|
| 104 |
-
|
| 105 |
-
# caption for the Base vs Instruct models
|
| 106 |
-
gr.HTML(LC_FOOTNOTE_HTML)
|
| 107 |
|
| 108 |
# all plots using Plotly
|
| 109 |
with gr.Tab("Plot View"):
|
|
@@ -161,17 +200,35 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emeral
|
|
| 161 |
create_leaderboard_handlers(
|
| 162 |
filter_leaderboard_fn=filter_leaderboard,
|
| 163 |
generate_scatter_plot_fn=generate_scatter_plot,
|
| 164 |
-
task_radio=
|
| 165 |
-
benchmark_radio=
|
| 166 |
-
model_type_dropdown=
|
| 167 |
-
search_box=
|
| 168 |
-
params_slider=
|
| 169 |
bubble_benchmark=bubble_benchmark,
|
| 170 |
bubble_metric=bubble_metric,
|
| 171 |
scatter_plot=scatter_plot,
|
| 172 |
-
leaderboard=
|
| 173 |
-
simulator_radio=
|
| 174 |
state=state,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
)
|
| 176 |
|
| 177 |
|
|
@@ -181,4 +238,4 @@ app.launch(
|
|
| 181 |
"hpai_logo_grad.png",
|
| 182 |
"bsc-logo.png",
|
| 183 |
]
|
| 184 |
-
)
|
|
|
|
| 19 |
from style.css_html_js import custom_css
|
| 20 |
|
| 21 |
|
| 22 |
+
def make_leaderboard_tab(state: Simulator, name: str):
|
| 23 |
+
"""Create a leaderboard tab with the given name and state."""
|
| 24 |
+
with gr.Tab(name):
|
| 25 |
+
# 1st row filters (select task, benchmark and sim)
|
| 26 |
+
with gr.Row(equal_height=True):
|
| 27 |
+
with gr.Column(scale=4):
|
| 28 |
+
task_radio = gr.Radio(choices=C.TASKS, label="Select Task", value=C.DEFAULT_TASK)
|
| 29 |
+
with gr.Column(scale=3):
|
| 30 |
+
benchmark_radio = gr.Radio(
|
| 31 |
+
choices=[C.DEFAULT_BENCHMARK] + C.S2R_BENCHMARKS,
|
| 32 |
+
label="Select Benchmark",
|
| 33 |
+
value=C.DEFAULT_BENCHMARK,
|
| 34 |
+
)
|
| 35 |
+
with gr.Column(scale=2, min_width=180):
|
| 36 |
+
simulator_radio = gr.Radio(
|
| 37 |
+
choices=C.SIMULATORS,
|
| 38 |
+
value=C.SIMULATORS[0],
|
| 39 |
+
label="Select Simulator",
|
| 40 |
+
scale=1,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# 2nd row filters (search, model type, params)
|
| 44 |
+
with gr.Row(equal_height=True):
|
| 45 |
+
search_box = gr.Textbox(
|
| 46 |
+
label="Search Model",
|
| 47 |
+
placeholder="Type model name...",
|
| 48 |
+
scale=2,
|
| 49 |
+
)
|
| 50 |
+
model_type_dropdown = gr.Radio(
|
| 51 |
+
choices=C.MODEL_TYPES,
|
| 52 |
+
label="Select Model Type",
|
| 53 |
+
value=C.DEFAULT_MODEL_TYPE,
|
| 54 |
+
scale=3,
|
| 55 |
+
)
|
| 56 |
+
params_slider = gr.Slider(
|
| 57 |
+
minimum=state.get_current_df()["Params"].min(),
|
| 58 |
+
maximum=C.DEFAULT_MAX_PARAMS,
|
| 59 |
+
value=C.DEFAULT_MAX_PARAMS,
|
| 60 |
+
label="Max Params",
|
| 61 |
+
step=1,
|
| 62 |
+
scale=2,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
if name == "Other Models":
|
| 66 |
+
show = False
|
| 67 |
+
else:
|
| 68 |
+
show = True
|
| 69 |
+
# main leaderboard content
|
| 70 |
+
leaderboard = gr.DataFrame(
|
| 71 |
+
value=filter_leaderboard(C.DEFAULT_TASK, C.DEFAULT_BENCHMARK, C.DEFAULT_MODEL_TYPE, "", C.DEFAULT_MAX_PARAMS, state, name),
|
| 72 |
+
headers="first row",
|
| 73 |
+
show_row_numbers=show,
|
| 74 |
+
wrap=True,
|
| 75 |
+
datatype=["html", "html"],
|
| 76 |
+
interactive=False,
|
| 77 |
+
column_widths=["7%", "28%", "13%", "10%", "13%", "10%", "14%"],
|
| 78 |
+
elem_classes="dataframe-leaderboard",
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# caption for the Base vs Instruct models
|
| 82 |
+
gr.HTML(LC_FOOTNOTE_HTML)
|
| 83 |
+
|
| 84 |
+
return (
|
| 85 |
+
task_radio,
|
| 86 |
+
benchmark_radio,
|
| 87 |
+
simulator_radio,
|
| 88 |
+
search_box,
|
| 89 |
+
model_type_dropdown,
|
| 90 |
+
params_slider,
|
| 91 |
+
leaderboard,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
|
| 95 |
# Load csv results
|
| 96 |
df_icarus = read_dataframe(C.ICARUS_RESULTS)
|
|
|
|
| 116 |
gr.HTML(NAV_BUTTONS_HTML)
|
| 117 |
gr.HTML(INTRO_HTML)
|
| 118 |
|
| 119 |
+
|
| 120 |
+
|
| 121 |
# Main view
|
| 122 |
with gr.Tabs() as tabs:
|
| 123 |
# Leaderboard
|
| 124 |
+
name_main = "Lastest Leaderboard"
|
| 125 |
+
(
|
| 126 |
+
task_radio_main,
|
| 127 |
+
benchmark_radio_main,
|
| 128 |
+
simulator_radio_main,
|
| 129 |
+
search_box_main,
|
| 130 |
+
model_type_dropdown_main,
|
| 131 |
+
params_slider_main,
|
| 132 |
+
leaderboard_main,
|
| 133 |
+
) = make_leaderboard_tab(state, name_main)
|
| 134 |
+
|
| 135 |
+
# Other models
|
| 136 |
+
name_other = "Other Models"
|
| 137 |
+
(
|
| 138 |
+
task_radio_other,
|
| 139 |
+
benchmark_radio_other,
|
| 140 |
+
simulator_radio_other,
|
| 141 |
+
search_box_other,
|
| 142 |
+
model_type_dropdown_other,
|
| 143 |
+
params_slider_other,
|
| 144 |
+
leaderboard_other,
|
| 145 |
+
) = make_leaderboard_tab(state, name_other)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
# all plots using Plotly
|
| 148 |
with gr.Tab("Plot View"):
|
|
|
|
| 200 |
create_leaderboard_handlers(
|
| 201 |
filter_leaderboard_fn=filter_leaderboard,
|
| 202 |
generate_scatter_plot_fn=generate_scatter_plot,
|
| 203 |
+
task_radio=task_radio_main,
|
| 204 |
+
benchmark_radio=benchmark_radio_main,
|
| 205 |
+
model_type_dropdown=model_type_dropdown_main,
|
| 206 |
+
search_box=search_box_main,
|
| 207 |
+
params_slider=params_slider_main,
|
| 208 |
bubble_benchmark=bubble_benchmark,
|
| 209 |
bubble_metric=bubble_metric,
|
| 210 |
scatter_plot=scatter_plot,
|
| 211 |
+
leaderboard=leaderboard_main,
|
| 212 |
+
simulator_radio=simulator_radio_main,
|
| 213 |
state=state,
|
| 214 |
+
name=name_main,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
create_leaderboard_handlers(
|
| 218 |
+
filter_leaderboard_fn=filter_leaderboard,
|
| 219 |
+
generate_scatter_plot_fn=generate_scatter_plot,
|
| 220 |
+
task_radio=task_radio_other,
|
| 221 |
+
benchmark_radio=benchmark_radio_other,
|
| 222 |
+
model_type_dropdown=model_type_dropdown_other,
|
| 223 |
+
search_box=search_box_other,
|
| 224 |
+
params_slider=params_slider_other,
|
| 225 |
+
bubble_benchmark=bubble_benchmark,
|
| 226 |
+
bubble_metric=bubble_metric,
|
| 227 |
+
scatter_plot=scatter_plot,
|
| 228 |
+
leaderboard=leaderboard_other,
|
| 229 |
+
simulator_radio=simulator_radio_other,
|
| 230 |
+
state=state,
|
| 231 |
+
name=name_other,
|
| 232 |
)
|
| 233 |
|
| 234 |
|
|
|
|
| 238 |
"hpai_logo_grad.png",
|
| 239 |
"bsc-logo.png",
|
| 240 |
]
|
| 241 |
+
)
|
config/constants.py
CHANGED
|
@@ -4,6 +4,17 @@ VERILATOR_RESULTS = f"{RESULTS_DIR}/results_verilator.json"
|
|
| 4 |
ICARUS_AGG = f"{RESULTS_DIR}/aggregated_scores_icarus.csv"
|
| 5 |
VERILATOR_AGG = f"{RESULTS_DIR}/aggregated_scores_verilator.csv"
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion †"]
|
| 8 |
S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
|
| 9 |
CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
|
|
|
|
| 4 |
ICARUS_AGG = f"{RESULTS_DIR}/aggregated_scores_icarus.csv"
|
| 5 |
VERILATOR_AGG = f"{RESULTS_DIR}/aggregated_scores_verilator.csv"
|
| 6 |
|
| 7 |
+
|
| 8 |
+
DISCARDED_MODELS = {
|
| 9 |
+
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
"""
|
| 13 |
+
"DeepSeek R1": "10/10/2025",
|
| 14 |
+
"QwenCoder 2.5 7B": "11/10/2025",
|
| 15 |
+
"RTLCoder Mistral": "14/10/2025"
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion †"]
|
| 19 |
S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
|
| 20 |
CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
|
data_processing.py
CHANGED
|
@@ -10,6 +10,7 @@ from config.constants import (
|
|
| 10 |
SCATTER_PLOT_X_TICKS,
|
| 11 |
TYPE_COLORS,
|
| 12 |
Y_AXIS_LIMITS,
|
|
|
|
| 13 |
)
|
| 14 |
from utils import filter_bench, filter_bench_all, filter_RTLRepo, handle_special_cases
|
| 15 |
|
|
@@ -40,7 +41,7 @@ class Simulator:
|
|
| 40 |
|
| 41 |
|
| 42 |
# filtering main function for the leaderboard body
|
| 43 |
-
def filter_leaderboard(task, benchmark, model_type, search_query, max_params, state):
|
| 44 |
"""Filter leaderboard data based on user selections."""
|
| 45 |
subset = state.get_current_df().copy()
|
| 46 |
|
|
@@ -69,15 +70,20 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
|
|
| 69 |
max_params = float(max_params)
|
| 70 |
subset = subset[subset["Params"] <= max_params]
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
if benchmark == "All":
|
| 73 |
if task == "Spec-to-RTL":
|
| 74 |
-
return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg S2R")
|
| 75 |
elif task == "Code Completion":
|
| 76 |
-
return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg MC")
|
| 77 |
elif task == "Line Completion †":
|
| 78 |
-
return filter_RTLRepo(subset)
|
| 79 |
elif benchmark == "RTL-Repo":
|
| 80 |
-
return filter_RTLRepo(subset)
|
| 81 |
else:
|
| 82 |
agg_column = None
|
| 83 |
if benchmark == "VerilogEval S2R":
|
|
@@ -89,7 +95,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
|
|
| 89 |
elif benchmark == "VeriGen":
|
| 90 |
agg_column = "Agg VeriGen"
|
| 91 |
|
| 92 |
-
return filter_bench(subset, state.get_current_agg(), agg_column)
|
| 93 |
|
| 94 |
|
| 95 |
def generate_scatter_plot(benchmark, metric, state):
|
|
@@ -97,6 +103,7 @@ def generate_scatter_plot(benchmark, metric, state):
|
|
| 97 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
| 98 |
|
| 99 |
subset = state.get_current_df()[state.get_current_df()["Benchmark"] == benchmark]
|
|
|
|
| 100 |
if benchmark == "RTL-Repo":
|
| 101 |
subset = subset[subset["Metric"].str.contains("EM", case=False, na=False)]
|
| 102 |
detailed_scores = subset.groupby("Model", as_index=False)["Score"].mean()
|
|
|
|
| 10 |
SCATTER_PLOT_X_TICKS,
|
| 11 |
TYPE_COLORS,
|
| 12 |
Y_AXIS_LIMITS,
|
| 13 |
+
DISCARDED_MODELS,
|
| 14 |
)
|
| 15 |
from utils import filter_bench, filter_bench_all, filter_RTLRepo, handle_special_cases
|
| 16 |
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
# filtering main function for the leaderboard body
|
| 44 |
+
def filter_leaderboard(task, benchmark, model_type, search_query, max_params, state, name):
|
| 45 |
"""Filter leaderboard data based on user selections."""
|
| 46 |
subset = state.get_current_df().copy()
|
| 47 |
|
|
|
|
| 70 |
max_params = float(max_params)
|
| 71 |
subset = subset[subset["Params"] <= max_params]
|
| 72 |
|
| 73 |
+
if name == "Other Models":
|
| 74 |
+
subset = subset[subset["Model"].isin(DISCARDED_MODELS)]
|
| 75 |
+
else:
|
| 76 |
+
subset = subset[~subset["Model"].isin(DISCARDED_MODELS)]
|
| 77 |
+
|
| 78 |
if benchmark == "All":
|
| 79 |
if task == "Spec-to-RTL":
|
| 80 |
+
return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg S2R", name=name)
|
| 81 |
elif task == "Code Completion":
|
| 82 |
+
return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg MC", name=name)
|
| 83 |
elif task == "Line Completion †":
|
| 84 |
+
return filter_RTLRepo(subset, name=name)
|
| 85 |
elif benchmark == "RTL-Repo":
|
| 86 |
+
return filter_RTLRepo(subset, name=name)
|
| 87 |
else:
|
| 88 |
agg_column = None
|
| 89 |
if benchmark == "VerilogEval S2R":
|
|
|
|
| 95 |
elif benchmark == "VeriGen":
|
| 96 |
agg_column = "Agg VeriGen"
|
| 97 |
|
| 98 |
+
return filter_bench(subset, state.get_current_agg(), agg_column, name=name)
|
| 99 |
|
| 100 |
|
| 101 |
def generate_scatter_plot(benchmark, metric, state):
|
|
|
|
| 103 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
| 104 |
|
| 105 |
subset = state.get_current_df()[state.get_current_df()["Benchmark"] == benchmark]
|
| 106 |
+
subset = subset[~subset["Model"].isin(DISCARDED_MODELS)]
|
| 107 |
if benchmark == "RTL-Repo":
|
| 108 |
subset = subset[subset["Metric"].str.contains("EM", case=False, na=False)]
|
| 109 |
detailed_scores = subset.groupby("Model", as_index=False)["Score"].mean()
|
handlers/leaderboard_handlers.py
CHANGED
|
@@ -26,6 +26,7 @@ def create_leaderboard_handlers(
|
|
| 26 |
leaderboard,
|
| 27 |
simulator_radio,
|
| 28 |
state,
|
|
|
|
| 29 |
):
|
| 30 |
def update_benchmarks_by_task(task):
|
| 31 |
if task == "Spec-to-RTL":
|
|
@@ -45,6 +46,7 @@ def create_leaderboard_handlers(
|
|
| 45 |
search_box.value,
|
| 46 |
params_slider.value,
|
| 47 |
state,
|
|
|
|
| 48 |
)
|
| 49 |
return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
|
| 50 |
|
|
@@ -77,7 +79,7 @@ def create_leaderboard_handlers(
|
|
| 77 |
):
|
| 78 |
state.set_simulator(simulator)
|
| 79 |
|
| 80 |
-
leaderboard_df = filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state)
|
| 81 |
fig = generate_scatter_plot_fn(plot_bench, plot_metric, state)
|
| 82 |
return leaderboard_df, fig
|
| 83 |
|
|
@@ -88,7 +90,7 @@ def create_leaderboard_handlers(
|
|
| 88 |
)
|
| 89 |
|
| 90 |
def filter_with_state(task, benchmark, model_type, search, max_params):
|
| 91 |
-
return filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state)
|
| 92 |
|
| 93 |
benchmark_radio.change(
|
| 94 |
fn=filter_with_state,
|
|
|
|
| 26 |
leaderboard,
|
| 27 |
simulator_radio,
|
| 28 |
state,
|
| 29 |
+
name,
|
| 30 |
):
|
| 31 |
def update_benchmarks_by_task(task):
|
| 32 |
if task == "Spec-to-RTL":
|
|
|
|
| 46 |
search_box.value,
|
| 47 |
params_slider.value,
|
| 48 |
state,
|
| 49 |
+
name,
|
| 50 |
)
|
| 51 |
return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
|
| 52 |
|
|
|
|
| 79 |
):
|
| 80 |
state.set_simulator(simulator)
|
| 81 |
|
| 82 |
+
leaderboard_df = filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state, name)
|
| 83 |
fig = generate_scatter_plot_fn(plot_bench, plot_metric, state)
|
| 84 |
return leaderboard_df, fig
|
| 85 |
|
|
|
|
| 90 |
)
|
| 91 |
|
| 92 |
def filter_with_state(task, benchmark, model_type, search, max_params):
|
| 93 |
+
return filter_leaderboard_fn(task, benchmark, model_type, search, max_params, state, name)
|
| 94 |
|
| 95 |
benchmark_radio.change(
|
| 96 |
fn=filter_with_state,
|
utils.py
CHANGED
|
@@ -5,8 +5,9 @@ import numpy as np
|
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.express as px
|
| 7 |
import plotly.graph_objects as go
|
|
|
|
| 8 |
|
| 9 |
-
from config.constants import COLUMN_MAPPINGS, COLUMN_ORDER, TYPE_EMOJI
|
| 10 |
|
| 11 |
|
| 12 |
def model_hyperlink(link, model_name, release, thinking=False):
|
|
@@ -20,6 +21,20 @@ def model_hyperlink(link, model_name, release, thinking=False):
|
|
| 20 |
return ret + reasoning_badge + new_badge if thinking == "Reasoning" else ret + new_badge
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def handle_special_cases(benchmark, metric):
|
| 24 |
if metric == "Exact Matching (EM)":
|
| 25 |
benchmark = "RTL-Repo"
|
|
@@ -28,7 +43,7 @@ def handle_special_cases(benchmark, metric):
|
|
| 28 |
return benchmark, metric
|
| 29 |
|
| 30 |
|
| 31 |
-
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
| 32 |
if subset.empty:
|
| 33 |
return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"])
|
| 34 |
|
|
@@ -42,6 +57,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
| 42 |
"Model"
|
| 43 |
)
|
| 44 |
filtered_df = subset[["Model", "Score"]].rename(columns={"Score": "Exact Matching (EM)"})
|
|
|
|
| 45 |
filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
|
| 46 |
filtered_df["Model"] = filtered_df.apply(
|
| 47 |
lambda row: model_hyperlink(
|
|
@@ -54,10 +70,13 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
| 54 |
filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))
|
| 55 |
filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]]
|
| 56 |
filtered_df = filtered_df.sort_values(by="Exact Matching (EM)", ascending=False).reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
| 57 |
return filtered_df
|
| 58 |
|
| 59 |
|
| 60 |
-
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
| 61 |
if subset.empty:
|
| 62 |
return pd.DataFrame(columns=COLUMN_ORDER)
|
| 63 |
|
|
@@ -85,6 +104,8 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
|
|
| 85 |
# else: # fallback
|
| 86 |
# pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)
|
| 87 |
|
|
|
|
|
|
|
| 88 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
| 89 |
pivot_df["Model"] = pivot_df.apply(
|
| 90 |
lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]),
|
|
@@ -95,12 +116,16 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
|
|
| 95 |
if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]):
|
| 96 |
pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2)
|
| 97 |
|
|
|
|
| 98 |
pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True)
|
| 99 |
pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]]
|
| 100 |
|
| 101 |
if "Functionality" in pivot_df.columns:
|
| 102 |
pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
|
| 103 |
|
|
|
|
|
|
|
|
|
|
| 104 |
return pivot_df
|
| 105 |
|
| 106 |
|
|
@@ -128,7 +153,7 @@ def custom_agg_cc(vals):
|
|
| 128 |
return round(result, 2)
|
| 129 |
|
| 130 |
|
| 131 |
-
def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
| 132 |
if subset.empty:
|
| 133 |
return pd.DataFrame(columns=COLUMN_ORDER)
|
| 134 |
|
|
@@ -164,4 +189,7 @@ def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.D
|
|
| 164 |
if "Functionality" in pivot_df.columns:
|
| 165 |
pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
|
| 166 |
|
|
|
|
|
|
|
|
|
|
| 167 |
return pivot_df
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.express as px
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
+
import re
|
| 9 |
|
| 10 |
+
from config.constants import COLUMN_MAPPINGS, COLUMN_ORDER, TYPE_EMOJI, DISCARDED_MODELS
|
| 11 |
|
| 12 |
|
| 13 |
def model_hyperlink(link, model_name, release, thinking=False):
|
|
|
|
| 21 |
return ret + reasoning_badge + new_badge if thinking == "Reasoning" else ret + new_badge
|
| 22 |
|
| 23 |
|
| 24 |
+
def extract_name_from_link(html: str) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Extracts the model name from the HTML generated by model_hyperlink()
|
| 27 |
+
"""
|
| 28 |
+
if not isinstance(html, str):
|
| 29 |
+
return html
|
| 30 |
+
|
| 31 |
+
match = re.search(r'<a[^>]*>(.*?)</a>', html)
|
| 32 |
+
if match:
|
| 33 |
+
return match.group(1).strip()
|
| 34 |
+
|
| 35 |
+
return re.sub(r'<[^>]+>', '', html).strip()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
def handle_special_cases(benchmark, metric):
|
| 39 |
if metric == "Exact Matching (EM)":
|
| 40 |
benchmark = "RTL-Repo"
|
|
|
|
| 43 |
return benchmark, metric
|
| 44 |
|
| 45 |
|
| 46 |
+
def filter_RTLRepo(subset: pd.DataFrame, name=str) -> pd.DataFrame:
|
| 47 |
if subset.empty:
|
| 48 |
return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"])
|
| 49 |
|
|
|
|
| 57 |
"Model"
|
| 58 |
)
|
| 59 |
filtered_df = subset[["Model", "Score"]].rename(columns={"Score": "Exact Matching (EM)"})
|
| 60 |
+
|
| 61 |
filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
|
| 62 |
filtered_df["Model"] = filtered_df.apply(
|
| 63 |
lambda row: model_hyperlink(
|
|
|
|
| 70 |
filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))
|
| 71 |
filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]]
|
| 72 |
filtered_df = filtered_df.sort_values(by="Exact Matching (EM)", ascending=False).reset_index(drop=True)
|
| 73 |
+
|
| 74 |
+
if name == "Other Models":
|
| 75 |
+
filtered_df["Date Discarded"] = filtered_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
|
| 76 |
return filtered_df
|
| 77 |
|
| 78 |
|
| 79 |
+
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame:
|
| 80 |
if subset.empty:
|
| 81 |
return pd.DataFrame(columns=COLUMN_ORDER)
|
| 82 |
|
|
|
|
| 104 |
# else: # fallback
|
| 105 |
# pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)
|
| 106 |
|
| 107 |
+
|
| 108 |
+
|
| 109 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
| 110 |
pivot_df["Model"] = pivot_df.apply(
|
| 111 |
lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]),
|
|
|
|
| 116 |
if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]):
|
| 117 |
pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2)
|
| 118 |
|
| 119 |
+
|
| 120 |
pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True)
|
| 121 |
pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]]
|
| 122 |
|
| 123 |
if "Functionality" in pivot_df.columns:
|
| 124 |
pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
|
| 125 |
|
| 126 |
+
if name != "Other Models":
|
| 127 |
+
pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
|
| 128 |
+
|
| 129 |
return pivot_df
|
| 130 |
|
| 131 |
|
|
|
|
| 153 |
return round(result, 2)
|
| 154 |
|
| 155 |
|
| 156 |
+
def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame:
|
| 157 |
if subset.empty:
|
| 158 |
return pd.DataFrame(columns=COLUMN_ORDER)
|
| 159 |
|
|
|
|
| 189 |
if "Functionality" in pivot_df.columns:
|
| 190 |
pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)
|
| 191 |
|
| 192 |
+
if name == "Other Models":
|
| 193 |
+
pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
|
| 194 |
+
|
| 195 |
return pivot_df
|