Spaces:
Running
Running
Andrea Seveso
commited on
Commit
·
3c69c38
1
Parent(s):
9dc8cf4
Add citation
Browse files- app.py +1 -1
- src/about.py +14 -2
app.py
CHANGED
|
@@ -152,7 +152,7 @@ def get_question_format_data():
|
|
| 152 |
dataset.columns = columns
|
| 153 |
dataset = dataset.round(1)
|
| 154 |
|
| 155 |
-
dataset = dataset.style.highlight_max(color='lightgreen', axis=0)
|
| 156 |
return dataset
|
| 157 |
|
| 158 |
|
|
|
|
| 152 |
dataset.columns = columns
|
| 153 |
dataset = dataset.round(1)
|
| 154 |
|
| 155 |
+
# dataset = dataset.style.highlight_max(color='lightgreen', axis=0)
|
| 156 |
return dataset
|
| 157 |
|
| 158 |
|
src/about.py
CHANGED
|
@@ -22,9 +22,12 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
| 22 |
|
| 23 |
# Your leaderboard name
|
| 24 |
TITLE = """<h1 align="center" id="space-title">👩🏫Invalsi Leaderboard
|
| 25 |
-
<img src="https://huggingface.co/spaces/Crisp-Unimib/INVALSIbenchmark/resolve/main/src/logo-crisp-eng-retina.png" height="800" width="150" style="vertical-align: middle;">
|
| 26 |
</h1>"""
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# What does your leaderboard evaluate?
|
| 29 |
INTRODUCTION_TEXT = """
|
| 30 |
Welcome into <a href="https://crispresearch.it/"><b>CRISP Bicocca</b></a> Invalsi Leaderboard page!
|
|
@@ -33,7 +36,7 @@ We adapted the INVALSI benchmark for automated LLM evaluation, which involves ri
|
|
| 33 |
|
| 34 |
Researchers are invited to submit their models for ongoing evaluation, ensuring the benchmark remains a current and valuable resource.
|
| 35 |
|
| 36 |
-
For more information on the benchmark, please refer to our arXiv paper <a href="https://arxiv.org/abs/
|
| 37 |
"""
|
| 38 |
|
| 39 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
@@ -246,6 +249,15 @@ This is a leaderboard for Open LLMs, and we'd love for as many people as possibl
|
|
| 246 |
|
| 247 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 248 |
CITATION_BUTTON_TEXT = r"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
"""
|
| 250 |
|
| 251 |
QUESTION_FORMAT_TEXT = """
|
|
|
|
| 22 |
|
| 23 |
# Your leaderboard name
|
| 24 |
TITLE = """<h1 align="center" id="space-title">👩🏫Invalsi Leaderboard
|
|
|
|
| 25 |
</h1>"""
|
| 26 |
|
| 27 |
+
"""
|
| 28 |
+
<img src="https://huggingface.co/spaces/Crisp-Unimib/INVALSIbenchmark/resolve/main/src/logo-crisp-eng-retina.png" height="800" width="150" style="vertical-align: middle;">
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
# What does your leaderboard evaluate?
|
| 32 |
INTRODUCTION_TEXT = """
|
| 33 |
Welcome into <a href="https://crispresearch.it/"><b>CRISP Bicocca</b></a> Invalsi Leaderboard page!
|
|
|
|
| 36 |
|
| 37 |
Researchers are invited to submit their models for ongoing evaluation, ensuring the benchmark remains a current and valuable resource.
|
| 38 |
|
| 39 |
+
For more information on the benchmark, please refer to our arXiv paper <a href="https://arxiv.org/abs/2406.17535"><b>here</b></a> and read the "About" section.
|
| 40 |
"""
|
| 41 |
|
| 42 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
|
| 249 |
|
| 250 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 251 |
CITATION_BUTTON_TEXT = r"""
|
| 252 |
+
@misc{mercorio2024disceautdeficereevaluating,
|
| 253 |
+
title={Disce aut Deficere: Evaluating LLMs Proficiency on the INVALSI Italian Benchmark},
|
| 254 |
+
author={Fabio Mercorio and Mario Mezzanzanica and Daniele Potertì and Antonio Serino and Andrea Seveso},
|
| 255 |
+
year={2024},
|
| 256 |
+
eprint={2406.17535},
|
| 257 |
+
archivePrefix={arXiv},
|
| 258 |
+
primaryClass={cs.CL}
|
| 259 |
+
url={https://arxiv.org/abs/2406.17535},
|
| 260 |
+
}
|
| 261 |
"""
|
| 262 |
|
| 263 |
QUESTION_FORMAT_TEXT = """
|