Spaces:
Running
Running
Merge branch 'main' of https://huggingface.co/spaces/HPAI-BSC/TuRTLe-Leaderboard
Browse files- about.py +9 -8
- app.py +16 -11
- css_html_js.py +1 -1
about.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 2 |
-
CITATION_BUTTON_TEXT = r"""@misc{
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
}
|
| 9 |
-
|
|
|
|
|
|
| 1 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 2 |
+
CITATION_BUTTON_TEXT = r"""@misc{garciagasulla2025turtleunifiedevaluationllms,
|
| 3 |
+
title={TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
|
| 4 |
+
author={Dario Garcia-Gasulla and Gokcen Kestor and Emanuele Parisi and Miquel Albert\'i-Binimelis and Cristian Gutierrez and Razine Moundir Ghorab and Orlando Montenegro and Bernat Homs and Miquel Moreto},
|
| 5 |
+
year={2025},
|
| 6 |
+
eprint={2504.01986},
|
| 7 |
+
archivePrefix={arXiv},
|
| 8 |
+
primaryClass={cs.AR},
|
| 9 |
+
url={https://arxiv.org/abs/2504.01986},
|
| 10 |
+
}"""
|
app.py
CHANGED
|
@@ -38,7 +38,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
|
|
| 38 |
|
| 39 |
if model_type != 'All':
|
| 40 |
# without emojis
|
| 41 |
-
subset = subset[subset['Model Type'] == model_type]
|
| 42 |
if search_query:
|
| 43 |
subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
|
| 44 |
max_params = float(max_params)
|
|
@@ -149,7 +149,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
| 149 |
lc_benchs = ["RTL-Repo"]
|
| 150 |
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
|
| 151 |
rtl_metrics = ["Exact Matching (EM)"]
|
| 152 |
-
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
| 153 |
|
| 154 |
gr.HTML("""
|
| 155 |
<p align="center" style="margin-bottom: -10px;">
|
|
@@ -160,7 +160,9 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
| 160 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
| 161 |
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
|
| 162 |
<div style="text-align: center; margin-bottom: 15px;">
|
| 163 |
-
<p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard!
|
|
|
|
|
|
|
| 164 |
<a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
|
| 165 |
<button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
|
| 166 |
GitHub Repo
|
|
@@ -192,21 +194,24 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
| 192 |
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
| 193 |
|
| 194 |
with gr.Row(equal_height=True):
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
| 199 |
choices=model_types,
|
| 200 |
label="Select Model Type",
|
| 201 |
-
value='All'
|
|
|
|
| 202 |
)
|
| 203 |
-
with gr.Column():
|
| 204 |
params_slider = gr.Slider(
|
| 205 |
minimum=df['Params'].min(),
|
| 206 |
maximum=700,
|
| 207 |
value=700,
|
| 208 |
label="Max Params",
|
| 209 |
-
step=1
|
|
|
|
| 210 |
)
|
| 211 |
|
| 212 |
leaderboard = gr.DataFrame(
|
|
@@ -218,7 +223,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
| 218 |
interactive=False,
|
| 219 |
column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
|
| 220 |
|
| 221 |
-
with gr.Tab("
|
| 222 |
with gr.Row(equal_height=True):
|
| 223 |
default_benchmark = s2r_benchs[0]
|
| 224 |
bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
|
|
|
|
| 38 |
|
| 39 |
if model_type != 'All':
|
| 40 |
# without emojis
|
| 41 |
+
subset = subset[subset['Model Type'] == model_type.split(" ")[0]]
|
| 42 |
if search_query:
|
| 43 |
subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
|
| 44 |
max_params = float(max_params)
|
|
|
|
| 149 |
lc_benchs = ["RTL-Repo"]
|
| 150 |
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
|
| 151 |
rtl_metrics = ["Exact Matching (EM)"]
|
| 152 |
+
model_types = ['All', 'General 🟢', 'Coding 🔵', 'RTL-Specific 🔴']
|
| 153 |
|
| 154 |
gr.HTML("""
|
| 155 |
<p align="center" style="margin-bottom: -10px;">
|
|
|
|
| 160 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
| 161 |
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
|
| 162 |
<div style="text-align: center; margin-bottom: 15px;">
|
| 163 |
+
<p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
|
| 164 |
+
Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
|
| 165 |
+
Use the filters below to explore different RTL benchmarks and models.</p>
|
| 166 |
<a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
|
| 167 |
<button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
|
| 168 |
GitHub Repo
|
|
|
|
| 194 |
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
| 195 |
|
| 196 |
with gr.Row(equal_height=True):
|
| 197 |
+
search_box = gr.Textbox(
|
| 198 |
+
label="Search Model",
|
| 199 |
+
placeholder="Type model name...",
|
| 200 |
+
scale=2,
|
| 201 |
+
)
|
| 202 |
+
model_type_dropdown = gr.Radio(
|
| 203 |
choices=model_types,
|
| 204 |
label="Select Model Type",
|
| 205 |
+
value='All',
|
| 206 |
+
scale=3,
|
| 207 |
)
|
|
|
|
| 208 |
params_slider = gr.Slider(
|
| 209 |
minimum=df['Params'].min(),
|
| 210 |
maximum=700,
|
| 211 |
value=700,
|
| 212 |
label="Max Params",
|
| 213 |
+
step=1,
|
| 214 |
+
scale=2,
|
| 215 |
)
|
| 216 |
|
| 217 |
leaderboard = gr.DataFrame(
|
|
|
|
| 223 |
interactive=False,
|
| 224 |
column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
|
| 225 |
|
| 226 |
+
with gr.Tab("Plot View"):
|
| 227 |
with gr.Row(equal_height=True):
|
| 228 |
default_benchmark = s2r_benchs[0]
|
| 229 |
bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
|
css_html_js.py
CHANGED
|
@@ -107,7 +107,7 @@ custom_css = """
|
|
| 107 |
border: 0;
|
| 108 |
}
|
| 109 |
.slider_input_container {
|
| 110 |
-
padding-top:
|
| 111 |
}
|
| 112 |
input[role="listbox"] {
|
| 113 |
cursor: pointer !important;
|
|
|
|
| 107 |
border: 0;
|
| 108 |
}
|
| 109 |
.slider_input_container {
|
| 110 |
+
padding-top: 2px;
|
| 111 |
}
|
| 112 |
input[role="listbox"] {
|
| 113 |
cursor: pointer !important;
|