Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -107,7 +107,7 @@ def init_leaderboard(dataframe):
|
|
| 107 |
interactive=False,
|
| 108 |
)
|
| 109 |
|
| 110 |
-
|
| 111 |
|
| 112 |
demo = gr.Blocks(css=custom_css)
|
| 113 |
with demo:
|
|
@@ -121,7 +121,9 @@ with demo:
|
|
| 121 |
|
| 122 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 123 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
| 126 |
with gr.Column():
|
| 127 |
with gr.Row():
|
|
@@ -151,40 +153,59 @@ with demo:
|
|
| 151 |
|
| 152 |
link_input = gr.Textbox(label="Link (str)", placeholder="请输入链接")
|
| 153 |
|
| 154 |
-
submit_button = gr.Button("Submit Eval")
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
| 187 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
# with gr.Column():
|
| 189 |
# with gr.Accordion(
|
| 190 |
# f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
|
|
|
| 107 |
interactive=False,
|
| 108 |
)
|
| 109 |
|
| 110 |
+
all_submissions = []
|
| 111 |
|
| 112 |
demo = gr.Blocks(css=custom_css)
|
| 113 |
with demo:
|
|
|
|
| 121 |
|
| 122 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 123 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
| 128 |
with gr.Column():
|
| 129 |
with gr.Row():
|
|
|
|
| 153 |
|
| 154 |
link_input = gr.Textbox(label="Link (str)", placeholder="请输入链接")
|
| 155 |
|
| 156 |
+
submit_button = gr.Button("Submit One Eval")
|
| 157 |
+
submit_all_button = gr.Button("Submit All")
|
| 158 |
+
|
| 159 |
+
submission_result = gr.Markdown("## Submited results")
|
| 160 |
+
|
| 161 |
+
def submit_eval(score, name, base_model, env, target_research, subset, link):
|
| 162 |
+
# 处理单条数据提交
|
| 163 |
+
result = {
|
| 164 |
+
"Score": score,
|
| 165 |
+
"Name": name,
|
| 166 |
+
"BaseModel": base_model,
|
| 167 |
+
"Env": env,
|
| 168 |
+
"Target-research": target_research,
|
| 169 |
+
"Subset": subset,
|
| 170 |
+
"Link": link,
|
| 171 |
+
"State": "Checking"
|
| 172 |
+
}
|
| 173 |
+
# 将结果添加到全局变量中
|
| 174 |
+
all_submissions.append(result)
|
| 175 |
+
# 更新页面展示
|
| 176 |
+
display_text = "\n".join([json.dumps(submission) for submission in all_submissions])
|
| 177 |
+
return display_text
|
| 178 |
+
|
| 179 |
+
def submit_all():
|
| 180 |
+
# 将所有结果一次性上传
|
| 181 |
+
out_path = "test-output.json"
|
| 182 |
+
with open(out_path, "w") as f:
|
| 183 |
+
f.write(json.dumps(all_submissions))
|
| 184 |
|
| 185 |
+
print("Uploading eval file")
|
| 186 |
+
API.upload_file(
|
| 187 |
+
path_or_fileobj=out_path,
|
| 188 |
+
path_in_repo=out_path,
|
| 189 |
+
repo_id="microsoft/MageBench-Leaderboard",
|
| 190 |
+
repo_type="space",
|
| 191 |
+
commit_message=f"Add submissions to checking queue",
|
| 192 |
)
|
| 193 |
+
all_submissions = []
|
| 194 |
+
return "All submissions uploaded successfully!"
|
| 195 |
+
|
| 196 |
+
# 单条数据提交按钮点击事件
|
| 197 |
+
submit_button.click(
|
| 198 |
+
submit_eval,
|
| 199 |
+
[score_input, name_input, base_model_input, env_dropdown, target_research_dropdown, subset_dropdown, link_input],
|
| 200 |
+
submission_result
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# 所有数据提交按钮点击事件
|
| 204 |
+
submit_all_button.click(
|
| 205 |
+
submit_all,
|
| 206 |
+
inputs=[],
|
| 207 |
+
outputs=submission_result
|
| 208 |
+
)
|
| 209 |
# with gr.Column():
|
| 210 |
# with gr.Accordion(
|
| 211 |
# f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|