Spaces:
Running
Running
Konstantin Chernyshev
commited on
Commit
·
ea5ca15
1
Parent(s):
f7d4dba
chore: remove ScatterPlot -s
Browse files
app.py
CHANGED
|
@@ -216,46 +216,46 @@ with demo:
|
|
| 216 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 217 |
with gr.TabItem("🏆 U-MATH", elem_id="u-math-benchmark-tab-table", id=0):
|
| 218 |
leaderboard_umath = init_leaderboard(LEADERBOARD_U_MATH_DF, U_MATH_COLUMNS_DICT)
|
| 219 |
-
gr.ScatterPlot(
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
)
|
| 229 |
|
| 230 |
with gr.TabItem("🏅 μ-MATH (Meta-Benchmark)", elem_id="mu-math-benchmark-tab-table", id=1):
|
| 231 |
leaderboard_mumath = init_leaderboard(LEADERBOARD_MU_MATH_DF, MU_MATH_COLUMNS_DICT)
|
| 232 |
-
gr.ScatterPlot(
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
)
|
| 242 |
|
| 243 |
with gr.TabItem("📊 U-MATH vs μ-MATH", elem_id="u-math-vs-mu-math-tab-table", id=2):
|
| 244 |
leaderboard_aggregated = init_leaderboard(LEADERBOARD_U_MATH_MU_MATH_JOINED_DF, U_MATH_AND_MU_MATH_COLUMNS_DICT)
|
| 245 |
-
gr.ScatterPlot(
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
)
|
| 259 |
|
| 260 |
with gr.TabItem("📝 About", elem_id="about-tab-table", id=3):
|
| 261 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 216 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 217 |
with gr.TabItem("🏆 U-MATH", elem_id="u-math-benchmark-tab-table", id=0):
|
| 218 |
leaderboard_umath = init_leaderboard(LEADERBOARD_U_MATH_DF, U_MATH_COLUMNS_DICT)
|
| 219 |
+
# gr.ScatterPlot(
|
| 220 |
+
# value=LEADERBOARD_U_MATH_DF,
|
| 221 |
+
# title="U-MATH: Text vs Visual Accuracy",
|
| 222 |
+
# x=U_MATH_COLUMNS_DICT["u_math_text_acc"].pretty_name,
|
| 223 |
+
# y=U_MATH_COLUMNS_DICT["u_math_visual_acc"].pretty_name,
|
| 224 |
+
# color=U_MATH_COLUMNS_DICT["model_family"].pretty_name,
|
| 225 |
+
# tooltip=[U_MATH_COLUMNS_DICT["full_model_name"].pretty_name, U_MATH_COLUMNS_DICT["u_math_acc"].pretty_name],
|
| 226 |
+
# elem_classes="scatter-plot",
|
| 227 |
+
# height=500,
|
| 228 |
+
# )
|
| 229 |
|
| 230 |
with gr.TabItem("🏅 μ-MATH (Meta-Benchmark)", elem_id="mu-math-benchmark-tab-table", id=1):
|
| 231 |
leaderboard_mumath = init_leaderboard(LEADERBOARD_MU_MATH_DF, MU_MATH_COLUMNS_DICT)
|
| 232 |
+
# gr.ScatterPlot(
|
| 233 |
+
# value=LEADERBOARD_MU_MATH_DF,
|
| 234 |
+
# title="μ-MATH: True Positive Rate (Recall) vs True Negative Rate (Specificity)",
|
| 235 |
+
# x=MU_MATH_COLUMNS_DICT["mu_math_tpr"].pretty_name,
|
| 236 |
+
# y=MU_MATH_COLUMNS_DICT["mu_math_tnr"].pretty_name,
|
| 237 |
+
# color=MU_MATH_COLUMNS_DICT["model_family"].pretty_name,
|
| 238 |
+
# tooltip=[MU_MATH_COLUMNS_DICT["full_model_name"].pretty_name, MU_MATH_COLUMNS_DICT["mu_math_f1"].pretty_name],
|
| 239 |
+
# elem_classes="scatter-plot",
|
| 240 |
+
# height=500,
|
| 241 |
+
# )
|
| 242 |
|
| 243 |
with gr.TabItem("📊 U-MATH vs μ-MATH", elem_id="u-math-vs-mu-math-tab-table", id=2):
|
| 244 |
leaderboard_aggregated = init_leaderboard(LEADERBOARD_U_MATH_MU_MATH_JOINED_DF, U_MATH_AND_MU_MATH_COLUMNS_DICT)
|
| 245 |
+
# gr.ScatterPlot(
|
| 246 |
+
# value=LEADERBOARD_U_MATH_MU_MATH_JOINED_DF,
|
| 247 |
+
# title="U-MATH Accuracy (Solving) vs μ-MATH F1 Score (Judging)",
|
| 248 |
+
# x=U_MATH_AND_MU_MATH_COLUMNS_DICT["u_math_acc"].pretty_name,
|
| 249 |
+
# y=U_MATH_AND_MU_MATH_COLUMNS_DICT["mu_math_f1"].pretty_name,
|
| 250 |
+
# color=U_MATH_AND_MU_MATH_COLUMNS_DICT["model_family"].pretty_name,
|
| 251 |
+
# tooltip=[
|
| 252 |
+
# U_MATH_AND_MU_MATH_COLUMNS_DICT["full_model_name"].pretty_name,
|
| 253 |
+
# U_MATH_AND_MU_MATH_COLUMNS_DICT["u_math_text_acc"].pretty_name,
|
| 254 |
+
# U_MATH_AND_MU_MATH_COLUMNS_DICT["u_math_visual_acc"].pretty_name,
|
| 255 |
+
# ],
|
| 256 |
+
# elem_classes="scatter-plot",
|
| 257 |
+
# height=500,
|
| 258 |
+
# )
|
| 259 |
|
| 260 |
with gr.TabItem("📝 About", elem_id="about-tab-table", id=3):
|
| 261 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|