update space
Browse files- app.py +4 -4
- src/about.py +17 -17
app.py
CHANGED
|
@@ -106,16 +106,16 @@ with demo:
|
|
| 106 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 107 |
|
| 108 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 109 |
-
with gr.TabItem("EN", elem_id="llm-benchmark-tab-table", id=1):
|
| 110 |
-
|
| 111 |
# with gr.TabItem("ZH", elem_id="llm-benchmark-tab-table", id=2):
|
| 112 |
# gr.Markdown(LLM_BENCHMARKS_TEXT_ZH, elem_classes="markdown-text")
|
| 113 |
|
| 114 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
| 115 |
with gr.Column():
|
| 116 |
with gr.Row():
|
| 117 |
-
with gr.TabItem("EN", elem_id="llm-benchmark-tab-table", id=1):
|
| 118 |
-
|
| 119 |
# with gr.TabItem("ZH", elem_id="llm-benchmark-tab-table", id=2):
|
| 120 |
# gr.Markdown(EVALUATION_QUEUE_TEXT_ZH, elem_classes="markdown-text")
|
| 121 |
|
|
|
|
| 106 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 107 |
|
| 108 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 109 |
+
# with gr.TabItem("EN", elem_id="llm-benchmark-tab-table", id=1):
|
| 110 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 111 |
# with gr.TabItem("ZH", elem_id="llm-benchmark-tab-table", id=2):
|
| 112 |
# gr.Markdown(LLM_BENCHMARKS_TEXT_ZH, elem_classes="markdown-text")
|
| 113 |
|
| 114 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
| 115 |
with gr.Column():
|
| 116 |
with gr.Row():
|
| 117 |
+
# with gr.TabItem("EN", elem_id="llm-benchmark-tab-table", id=1):
|
| 118 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
| 119 |
# with gr.TabItem("ZH", elem_id="llm-benchmark-tab-table", id=2):
|
| 120 |
# gr.Markdown(EVALUATION_QUEUE_TEXT_ZH, elem_classes="markdown-text")
|
| 121 |
|
src/about.py
CHANGED
|
@@ -12,23 +12,23 @@ class Task:
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
-
task0 = Task("mmmlu", "acc", "MMMLU")
|
| 16 |
# task1 = Task("mmlu", "acc", "MMLU")
|
| 17 |
# task2 = Task("cmmlu", "acc", "CMMLU")
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
NUM_FEWSHOT = 5 # Change with your few shot
|
| 33 |
# ---------------------------------------------------
|
| 34 |
|
|
@@ -148,7 +148,7 @@ We also thank [Institution1 Placeholder], [Institution2 Placeholder], [Organizat
|
|
| 148 |
|
| 149 |
We would like to create a leaderboard as diverse as possible, reach out if you would like us to include your evaluation dataset!
|
| 150 |
|
| 151 |
-
Comments and suggestions are more than welcome! Visit the [👏 Community](<Community Page Placeholder>) page, tell us what you think about
|
| 152 |
|
| 153 |
Thank you very much! 💛
|
| 154 |
|
|
@@ -256,7 +256,7 @@ MMMLU 排行榜旨在为比较 AI 模型在这些多语言和多领域中的表
|
|
| 256 |
|
| 257 |
我们希望创建一个尽可能多样化的排行榜,欢迎联系我们如果你希望我们将你的评估数据集包含在内!
|
| 258 |
|
| 259 |
-
评论和建议非常欢迎!请访问 [👏 社区](<Community Page Placeholder>) 页面,告诉我们你对
|
| 260 |
|
| 261 |
非常感谢! 💛
|
| 262 |
"""
|
|
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
+
# task0 = Task("mmmlu", "acc", "MMMLU")
|
| 16 |
# task1 = Task("mmlu", "acc", "MMLU")
|
| 17 |
# task2 = Task("cmmlu", "acc", "CMMLU")
|
| 18 |
+
mmmlu_ar = Task("mmmlu_ar", "acc", "MMMLU_AR")
|
| 19 |
+
mmmlu_bn = Task("mmmlu_bn", "acc", "MMMLU_BN")
|
| 20 |
+
mmmlu_de = Task("mmmlu_de", "acc", "MMMLU_DE")
|
| 21 |
+
mmmlu_es = Task("mmmlu_es", "acc", "MMMLU_ES")
|
| 22 |
+
mmmlu_fr = Task("mmmlu_fr", "acc", "MMMLU_FR")
|
| 23 |
+
mmmlu_hi = Task("mmmlu_hi", "acc", "MMMLU_HI")
|
| 24 |
+
mmmlu_id = Task("mmmlu_id", "acc", "MMMLU_ID")
|
| 25 |
+
mmmlu_it = Task("mmmlu_it", "acc", "MMMLU_IT")
|
| 26 |
+
mmmlu_ja = Task("mmmlu_ja", "acc", "MMMLU_JA")
|
| 27 |
+
mmmlu_ko = Task("mmmlu_ko", "acc", "MMMLU_KO")
|
| 28 |
+
mmmlu_pt = Task("mmmlu_pt", "acc", "MMMLU_PT")
|
| 29 |
+
mmmlu_sw = Task("mmmlu_sw", "acc", "MMMLU_SW")
|
| 30 |
+
mmmlu_yo = Task("mmmlu_yo", "acc", "MMMLU_YO")
|
| 31 |
+
mmmlu_zh = Task("mmmlu_zh", "acc", "MMMLU_ZH")
|
| 32 |
NUM_FEWSHOT = 5 # Change with your few shot
|
| 33 |
# ---------------------------------------------------
|
| 34 |
|
|
|
|
| 148 |
|
| 149 |
We would like to create a leaderboard as diverse as possible, reach out if you would like us to include your evaluation dataset!
|
| 150 |
|
| 151 |
+
Comments and suggestions are more than welcome! Visit the [👏 Community](<Community Page Placeholder>) page, tell us what you think about MMMLU Leaderboard and how we can improve it, or go ahead and open a PR!
|
| 152 |
|
| 153 |
Thank you very much! 💛
|
| 154 |
|
|
|
|
| 256 |
|
| 257 |
我们希望创建一个尽可能多样化的排行榜,欢迎联系我们如果你希望我们将你的评估数据集包含在内!
|
| 258 |
|
| 259 |
+
评论和建议非常欢迎!请访问 [👏 社区](<Community Page Placeholder>) 页面,告诉我们你对 MMMLU 排行榜的看法以及我们如何改进,或者直接打开一个 PR!
|
| 260 |
|
| 261 |
非常感谢! 💛
|
| 262 |
"""
|