Spaces:

SeaLLMs
/

LLM_Leaderboard_for_SEA

Running

isakzhang commited on Apr 22, 2024

Commit

cc86cb5

1 Parent(s): 0440741

add intro text

Files changed (2) hide show

app.py CHANGED Viewed

@@ -133,7 +133,7 @@ with demo:
                 ],
                 leaderboard_table,
             )
-        with gr.TabItem("🏅 M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
             with gr.Row():
                 search_bar = gr.Textbox(
                     placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
@@ -168,7 +168,7 @@ with demo:
                 leaderboard_table,
             )
-        with gr.TabItem("🏅 MMLU", elem_id="llm-benchmark-MMLU", id=2):
             with gr.Row():
                 search_bar = gr.Textbox(
                     placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",

                 ],
                 leaderboard_table,
             )
+        with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
             with gr.Row():
                 search_bar = gr.Textbox(
                     placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
                 leaderboard_table,
             )
+        with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
             with gr.Row():
                 search_bar = gr.Textbox(
                     placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",

src/display/about.py CHANGED Viewed

@@ -16,12 +16,15 @@ class Tasks(Enum):
 # Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">SeaExam Leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-🟢: base
-🔶: chat
 """
 # Which evaluations are you running? how can people reproduce what you have?

 # Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">📃 SeaExam Leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human exam-type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).
+For additional details such as datasets, evaluation criteria, and reproducibility, please refer to the "📝 About" tab.
+Also check the [SeaBench leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaBench_leaderboard) - focusing on evaluating the model's ability to follow instructions in real-world multi-turn settings
 """
 # Which evaluations are you running? how can people reproduce what you have?