Spaces:
Running
Running
update leaderboard layout
Browse files- app.py +5 -2
- static/css/style.css +39 -33
- utils.py +5 -3
app.py
CHANGED
|
@@ -52,6 +52,11 @@ with gr.Blocks(css=css) as block:
|
|
| 52 |
)
|
| 53 |
|
| 54 |
initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
data_component = gr.Dataframe(
|
| 56 |
value=initial_data,
|
| 57 |
headers=initial_headers,
|
|
@@ -76,5 +81,3 @@ with gr.Blocks(css=css) as block:
|
|
| 76 |
|
| 77 |
if __name__ == "__main__":
|
| 78 |
block.launch(share=True)
|
| 79 |
-
#block.launch(server_name="127.0.0.1", server_port=7860)
|
| 80 |
-
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
|
| 55 |
+
gr.Markdown(
|
| 56 |
+
"**Table 1: MEGA-Bench full results.** <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> $\\text{Overall} \\ = \\ \\frac{\\max(\\text{Core w/o CoT}, \\ \\text{Core w/ CoT}) \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$",
|
| 57 |
+
elem_classes="table-caption",
|
| 58 |
+
latex_delimiters=[ {"left": "$", "right": "$", "display": False }],
|
| 59 |
+
)
|
| 60 |
data_component = gr.Dataframe(
|
| 61 |
value=initial_data,
|
| 62 |
headers=initial_headers,
|
|
|
|
| 81 |
|
| 82 |
if __name__ == "__main__":
|
| 83 |
block.launch(share=True)
|
|
|
|
|
|
static/css/style.css
CHANGED
|
@@ -26,74 +26,80 @@
|
|
| 26 |
|
| 27 |
/* Light mode styles */
|
| 28 |
.custom-dataframe {
|
| 29 |
-
color:
|
| 30 |
-
background-color:
|
| 31 |
}
|
| 32 |
|
| 33 |
.custom-dataframe thead th {
|
| 34 |
-
background-color:
|
| 35 |
-
color:
|
| 36 |
}
|
| 37 |
|
| 38 |
.custom-dataframe tbody td {
|
| 39 |
-
background-color:
|
| 40 |
-
color:
|
| 41 |
}
|
| 42 |
|
| 43 |
-
.custom-dataframe thead th:nth-child(-n+
|
| 44 |
-
.custom-dataframe tbody td:nth-child(-n+
|
| 45 |
-
background-color:
|
| 46 |
}
|
| 47 |
|
| 48 |
-
.custom-dataframe thead th:nth-child(n+
|
| 49 |
-
.custom-dataframe tbody td:nth-child(n+
|
| 50 |
-
background-color:
|
| 51 |
}
|
| 52 |
|
| 53 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+
|
| 54 |
-
background-color:
|
| 55 |
}
|
| 56 |
|
| 57 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+
|
| 58 |
-
background-color:
|
| 59 |
}
|
| 60 |
|
| 61 |
/* Dark mode styles */
|
| 62 |
@media (prefers-color-scheme: dark) {
|
| 63 |
.custom-dataframe {
|
| 64 |
-
color:
|
| 65 |
-
background-color:
|
| 66 |
}
|
| 67 |
|
| 68 |
.custom-dataframe thead th {
|
| 69 |
-
background-color:
|
| 70 |
-
color:
|
| 71 |
}
|
| 72 |
|
| 73 |
.custom-dataframe tbody td {
|
| 74 |
-
background-color:
|
| 75 |
-
color:
|
| 76 |
}
|
| 77 |
|
| 78 |
-
.custom-dataframe thead th:nth-child(-n+
|
| 79 |
-
.custom-dataframe tbody td:nth-child(-n+
|
| 80 |
-
background-color:
|
| 81 |
}
|
| 82 |
|
| 83 |
-
.custom-dataframe thead th:nth-child(n+
|
| 84 |
-
.custom-dataframe tbody td:nth-child(n+
|
| 85 |
-
background-color:
|
| 86 |
}
|
| 87 |
|
| 88 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+
|
| 89 |
-
background-color:
|
| 90 |
}
|
| 91 |
|
| 92 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+
|
| 93 |
-
background-color:
|
| 94 |
}
|
| 95 |
|
| 96 |
.custom-dataframe tbody tr:hover td {
|
| 97 |
-
background-color:
|
| 98 |
}
|
| 99 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
/* Light mode styles */
|
| 28 |
.custom-dataframe {
|
| 29 |
+
color: var(--text-color);
|
| 30 |
+
background-color: var(--background-color);
|
| 31 |
}
|
| 32 |
|
| 33 |
.custom-dataframe thead th {
|
| 34 |
+
background-color: var(--header-background) !important;
|
| 35 |
+
color: var(--text-color) !important;
|
| 36 |
}
|
| 37 |
|
| 38 |
.custom-dataframe tbody td {
|
| 39 |
+
background-color: var(--background-color) !important;
|
| 40 |
+
color: var(--text-color) !important;
|
| 41 |
}
|
| 42 |
|
| 43 |
+
.custom-dataframe thead th:nth-child(-n+5),
|
| 44 |
+
.custom-dataframe tbody td:nth-child(-n+5) {
|
| 45 |
+
background-color: var(--global-column-background) !important;
|
| 46 |
}
|
| 47 |
|
| 48 |
+
.custom-dataframe thead th:nth-child(n+6),
|
| 49 |
+
.custom-dataframe tbody td:nth-child(n+6) {
|
| 50 |
+
background-color: var(--dimension-column-background) !important;
|
| 51 |
}
|
| 52 |
|
| 53 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
|
| 54 |
+
background-color: var(--row-even-global) !important;
|
| 55 |
}
|
| 56 |
|
| 57 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
|
| 58 |
+
background-color: var(--row-even-dimension) !important;
|
| 59 |
}
|
| 60 |
|
| 61 |
/* Dark mode styles */
|
| 62 |
@media (prefers-color-scheme: dark) {
|
| 63 |
.custom-dataframe {
|
| 64 |
+
color: var(--text-color) !important;
|
| 65 |
+
background-color: var(--background-color) !important;
|
| 66 |
}
|
| 67 |
|
| 68 |
.custom-dataframe thead th {
|
| 69 |
+
background-color: var(--header-background) !important;
|
| 70 |
+
color: var(--text-color) !important;
|
| 71 |
}
|
| 72 |
|
| 73 |
.custom-dataframe tbody td {
|
| 74 |
+
background-color: var(--background-color) !important;
|
| 75 |
+
color: var(--text-color) !important;
|
| 76 |
}
|
| 77 |
|
| 78 |
+
.custom-dataframe thead th:nth-child(-n+5),
|
| 79 |
+
.custom-dataframe tbody td:nth-child(-n+5) {
|
| 80 |
+
background-color: var(--global-column-background) !important;
|
| 81 |
}
|
| 82 |
|
| 83 |
+
.custom-dataframe thead th:nth-child(n+6),
|
| 84 |
+
.custom-dataframe tbody td:nth-child(n+6) {
|
| 85 |
+
background-color: var(--dimension-column-background) !important;
|
| 86 |
}
|
| 87 |
|
| 88 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
|
| 89 |
+
background-color: var(--row-even-global) !important;
|
| 90 |
}
|
| 91 |
|
| 92 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
|
| 93 |
+
background-color: var(--row-even-dimension) !important;
|
| 94 |
}
|
| 95 |
|
| 96 |
.custom-dataframe tbody tr:hover td {
|
| 97 |
+
background-color: var(--hover-background) !important;
|
| 98 |
}
|
| 99 |
}
|
| 100 |
+
|
| 101 |
+
.table-caption {
|
| 102 |
+
text-align: center;
|
| 103 |
+
margin-top: 10px;
|
| 104 |
+
color: var(--text-color);
|
| 105 |
+
}
|
utils.py
CHANGED
|
@@ -121,11 +121,13 @@ def get_df(selected_super_group, selected_model_group):
|
|
| 121 |
for model in MODEL_GROUPS[selected_model_group]:
|
| 122 |
model_data = MODEL_DATA[model]
|
| 123 |
summary = SUMMARY_DATA[model]
|
| 124 |
-
|
|
|
|
| 125 |
row = {
|
| 126 |
"Models": get_display_model_name(model), # Use the mapped name
|
| 127 |
"Overall": round(summary["overall_score"] * 100, 2),
|
| 128 |
-
"Core": round(
|
|
|
|
| 129 |
"Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
|
| 130 |
}
|
| 131 |
for keyword in SUPER_GROUPS[selected_super_group]:
|
|
@@ -142,6 +144,6 @@ def get_df(selected_super_group, selected_model_group):
|
|
| 142 |
|
| 143 |
def get_leaderboard_data(selected_super_group, selected_model_group):
|
| 144 |
df = get_df(selected_super_group, selected_model_group)
|
| 145 |
-
headers = ["Models", "Overall", "Core", "Open-ended"] + SUPER_GROUPS[selected_super_group]
|
| 146 |
data = df[headers].values.tolist()
|
| 147 |
return headers, data
|
|
|
|
| 121 |
for model in MODEL_GROUPS[selected_model_group]:
|
| 122 |
model_data = MODEL_DATA[model]
|
| 123 |
summary = SUMMARY_DATA[model]
|
| 124 |
+
core_noncot_score = summary["core_noncot"]["macro_mean_score"]
|
| 125 |
+
core_cot_score = summary["core_cot"]["macro_mean_score"]
|
| 126 |
row = {
|
| 127 |
"Models": get_display_model_name(model), # Use the mapped name
|
| 128 |
"Overall": round(summary["overall_score"] * 100, 2),
|
| 129 |
+
"Core(w/o CoT)": round(core_noncot_score * 100, 2),
|
| 130 |
+
"Core(w/ CoT)": round(core_cot_score * 100, 2),
|
| 131 |
"Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
|
| 132 |
}
|
| 133 |
for keyword in SUPER_GROUPS[selected_super_group]:
|
|
|
|
| 144 |
|
| 145 |
def get_leaderboard_data(selected_super_group, selected_model_group):
|
| 146 |
df = get_df(selected_super_group, selected_model_group)
|
| 147 |
+
headers = ["Models", "Overall", "Core(w/o CoT)", "Core(w/ CoT)", "Open-ended"] + SUPER_GROUPS[selected_super_group]
|
| 148 |
data = df[headers].values.tolist()
|
| 149 |
return headers, data
|