INVALSIbenchmark

Running

Andrea Seveso commited on Jun 26, 2024

Commit

3c69c38

1 Parent(s): 9dc8cf4

Add citation

Files changed (2) hide show

app.py CHANGED Viewed

@@ -152,7 +152,7 @@ def get_question_format_data():
     dataset.columns = columns
     dataset = dataset.round(1)
-    dataset = dataset.style.highlight_max(color='lightgreen', axis=0)
     return dataset

     dataset.columns = columns
     dataset = dataset.round(1)
+    # dataset = dataset.style.highlight_max(color='lightgreen', axis=0)
     return dataset

src/about.py CHANGED Viewed

@@ -22,9 +22,12 @@ NUM_FEWSHOT = 0  # Change with your few shot
 # Your leaderboard name
 TITLE = """<h1 align="center" id="space-title">👩‍🏫Invalsi Leaderboard
-<img src="https://huggingface.co/spaces/Crisp-Unimib/INVALSIbenchmark/resolve/main/src/logo-crisp-eng-retina.png" height="800" width="150" style="vertical-align: middle;">
 </h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 Welcome into <a href="https://crispresearch.it/"><b>CRISP Bicocca</b></a> Invalsi Leaderboard page!
@@ -33,7 +36,7 @@ We adapted the INVALSI benchmark for automated LLM evaluation, which involves ri
 Researchers are invited to submit their models for ongoing evaluation, ensuring the benchmark remains a current and valuable resource.
-For more information on the benchmark, please refer to our arXiv paper <a href="https://arxiv.org/abs/SOON"><b>here</b></a> and read the "About" section.
 """
 # Which evaluations are you running? how can people reproduce what you have?
@@ -246,6 +249,15 @@ This is a leaderboard for Open LLMs, and we'd love for as many people as possibl
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
 """
 QUESTION_FORMAT_TEXT = """

 # Your leaderboard name
 TITLE = """<h1 align="center" id="space-title">👩‍🏫Invalsi Leaderboard
 </h1>"""
+"""
+<img src="https://huggingface.co/spaces/Crisp-Unimib/INVALSIbenchmark/resolve/main/src/logo-crisp-eng-retina.png" height="800" width="150" style="vertical-align: middle;">
+"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 Welcome into <a href="https://crispresearch.it/"><b>CRISP Bicocca</b></a> Invalsi Leaderboard page!
 Researchers are invited to submit their models for ongoing evaluation, ensuring the benchmark remains a current and valuable resource.
+For more information on the benchmark, please refer to our arXiv paper <a href="https://arxiv.org/abs/2406.17535"><b>here</b></a> and read the "About" section.
 """
 # Which evaluations are you running? how can people reproduce what you have?
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
+@misc{mercorio2024disceautdeficereevaluating,
+      title={Disce aut Deficere: Evaluating LLMs Proficiency on the INVALSI Italian Benchmark},
+      author={Fabio Mercorio and Mario Mezzanzanica and Daniele Potertì and Antonio Serino and Andrea Seveso},
+      year={2024},
+      eprint={2406.17535},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+      url={https://arxiv.org/abs/2406.17535},
+}
 """
 QUESTION_FORMAT_TEXT = """