Spaces:
Running
Running
add intro text
Browse files- app.py +2 -2
- src/display/about.py +6 -3
app.py
CHANGED
|
@@ -133,7 +133,7 @@ with demo:
|
|
| 133 |
],
|
| 134 |
leaderboard_table,
|
| 135 |
)
|
| 136 |
-
with gr.TabItem("
|
| 137 |
with gr.Row():
|
| 138 |
search_bar = gr.Textbox(
|
| 139 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
|
@@ -168,7 +168,7 @@ with demo:
|
|
| 168 |
leaderboard_table,
|
| 169 |
)
|
| 170 |
|
| 171 |
-
with gr.TabItem("
|
| 172 |
with gr.Row():
|
| 173 |
search_bar = gr.Textbox(
|
| 174 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
|
|
|
| 133 |
],
|
| 134 |
leaderboard_table,
|
| 135 |
)
|
| 136 |
+
with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
|
| 137 |
with gr.Row():
|
| 138 |
search_bar = gr.Textbox(
|
| 139 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
|
|
|
| 168 |
leaderboard_table,
|
| 169 |
)
|
| 170 |
|
| 171 |
+
with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
|
| 172 |
with gr.Row():
|
| 173 |
search_bar = gr.Textbox(
|
| 174 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
src/display/about.py
CHANGED
|
@@ -16,12 +16,15 @@ class Tasks(Enum):
|
|
| 16 |
|
| 17 |
|
| 18 |
# Your leaderboard name
|
| 19 |
-
TITLE = """<h1 align="center" id="space-title"
|
| 20 |
|
| 21 |
# What does your leaderboard evaluate?
|
| 22 |
INTRODUCTION_TEXT = """
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# Your leaderboard name
|
| 19 |
+
TITLE = """<h1 align="center" id="space-title">π SeaExam Leaderboard</h1>"""
|
| 20 |
|
| 21 |
# What does your leaderboard evaluate?
|
| 22 |
INTRODUCTION_TEXT = """
|
| 23 |
+
This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human exam-type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).
|
| 24 |
+
|
| 25 |
+
For additional details such as datasets, evaluation criteria, and reproducibility, please refer to the "π About" tab.
|
| 26 |
+
|
| 27 |
+
Also check the [SeaBench leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaBench_leaderboard) - focusing on evaluating the model's ability to follow instructions in real-world multi-turn settings
|
| 28 |
"""
|
| 29 |
|
| 30 |
# Which evaluations are you running? how can people reproduce what you have?
|