Spaces:
Running
Running
update
Browse files- constants.py +3 -0
- static/eval_results/Default/self_reported.json +23 -6
- utils.py +13 -4
constants.py
CHANGED
|
@@ -261,6 +261,9 @@ MODEL_URLS = {
|
|
| 261 |
"InternVL3_14B": "https://huggingface.co/OpenGVLab/InternVL3-14B",
|
| 262 |
"InternVL3_38B": "https://huggingface.co/OpenGVLab/InternVL3-38B",
|
| 263 |
"InternVL3_78B": "https://huggingface.co/OpenGVLab/InternVL3-78B",
|
|
|
|
|
|
|
|
|
|
| 264 |
}
|
| 265 |
|
| 266 |
# Define the base MODEL_GROUPS structure
|
|
|
|
| 261 |
"InternVL3_14B": "https://huggingface.co/OpenGVLab/InternVL3-14B",
|
| 262 |
"InternVL3_38B": "https://huggingface.co/OpenGVLab/InternVL3-38B",
|
| 263 |
"InternVL3_78B": "https://huggingface.co/OpenGVLab/InternVL3-78B",
|
| 264 |
+
"GPT-o1": "https://openai.com/o1/",
|
| 265 |
+
"GPT-o1-mini": "https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/",
|
| 266 |
+
"Seed1.5-VL": "https://github.com/ByteDance-Seed/Seed1.5-VL",
|
| 267 |
}
|
| 268 |
|
| 269 |
# Define the base MODEL_GROUPS structure
|
static/eval_results/Default/self_reported.json
CHANGED
|
@@ -1,8 +1,25 @@
|
|
| 1 |
{
|
| 2 |
-
"MiniMax-VL-01":
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
"Qwen2.5-VL-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"MiniMax-VL-01": {
|
| 3 |
+
"overall": 47.4
|
| 4 |
+
},
|
| 5 |
+
"Qwen2.5-VL-72B": {
|
| 6 |
+
"overall": 51.3
|
| 7 |
+
},
|
| 8 |
+
"Qwen2.5-VL-7B": {
|
| 9 |
+
"overall": 36.8
|
| 10 |
+
},
|
| 11 |
+
"Qwen2.5-VL-3B": {
|
| 12 |
+
"overall": 28.9
|
| 13 |
+
},
|
| 14 |
+
"GPT-o1": {
|
| 15 |
+
"overall": 58.0
|
| 16 |
+
},
|
| 17 |
+
"GPT-o1-mini": {
|
| 18 |
+
"overall": 54.2
|
| 19 |
+
},
|
| 20 |
+
"Seed1.5-VL": {
|
| 21 |
+
"overall": 59.85,
|
| 22 |
+
"core": 58.58,
|
| 23 |
+
"open": 68.46
|
| 24 |
+
}
|
| 25 |
}
|
utils.py
CHANGED
|
@@ -106,11 +106,20 @@ class MEGABenchEvalDataLoader:
|
|
| 106 |
|
| 107 |
# Add asterisk for self-reported results
|
| 108 |
if model in self.SELF_REPORTED:
|
|
|
|
| 109 |
# Store numeric value for sorting but display with asterisk
|
| 110 |
-
row["Overall"] =
|
| 111 |
-
row["Overall_display"] = f"{
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
for display_name in self.SUPER_GROUPS[selected_super_group]:
|
| 115 |
row[display_name] = None
|
| 116 |
else:
|
|
|
|
| 106 |
|
| 107 |
# Add asterisk for self-reported results
|
| 108 |
if model in self.SELF_REPORTED:
|
| 109 |
+
model_scores = self.SELF_REPORTED[model]
|
| 110 |
# Store numeric value for sorting but display with asterisk
|
| 111 |
+
row["Overall"] = model_scores["overall"]
|
| 112 |
+
row["Overall_display"] = f"{model_scores['overall']:.2f}*"
|
| 113 |
+
# Handle optional core and open scores
|
| 114 |
+
core_score = model_scores.get("core")
|
| 115 |
+
open_score = model_scores.get("open")
|
| 116 |
+
row["Core"] = core_score
|
| 117 |
+
row["Open-ended"] = open_score
|
| 118 |
+
# Add asterisk to core and open scores if they exist
|
| 119 |
+
if core_score is not None:
|
| 120 |
+
row["Core"] = f"{core_score:.2f}*"
|
| 121 |
+
if open_score is not None:
|
| 122 |
+
row["Open-ended"] = f"{open_score:.2f}*"
|
| 123 |
for display_name in self.SUPER_GROUPS[selected_super_group]:
|
| 124 |
row[display_name] = None
|
| 125 |
else:
|