future-xy
commited on
Commit
·
82a6ed1
1
Parent(s):
a4a186c
fix result display bug
Browse files- src/backend/envs.py +1 -1
- src/display/utils.py +16 -18
src/backend/envs.py
CHANGED
|
@@ -43,7 +43,7 @@ class Tasks(Enum):
|
|
| 43 |
|
| 44 |
# task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
|
| 45 |
|
| 46 |
-
task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
|
| 47 |
|
| 48 |
# task15 = Task("fever10", "acc", "FEVER", 16)
|
| 49 |
# task15_1 = Task("fever11", "acc", "FEVER", 8)
|
|
|
|
| 43 |
|
| 44 |
# task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
|
| 45 |
|
| 46 |
+
# task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
|
| 47 |
|
| 48 |
# task15 = Task("fever10", "acc", "FEVER", 16)
|
| 49 |
# task15_1 = Task("fever11", "acc", "FEVER", 8)
|
src/display/utils.py
CHANGED
|
@@ -45,8 +45,8 @@ class Tasks(Enum):
|
|
| 45 |
# halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
| 46 |
|
| 47 |
# # XXX include me back at some point
|
| 48 |
-
selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
| 49 |
-
mmlu = Task("
|
| 50 |
|
| 51 |
|
| 52 |
# These classes are for user facing column names,
|
|
@@ -63,11 +63,9 @@ class ColumnContent:
|
|
| 63 |
|
| 64 |
|
| 65 |
auto_eval_column_dict = []
|
| 66 |
-
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "str", True, never_hidden=True)])
|
| 67 |
-
auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware", "str", True, never_hidden=True)])
|
| 68 |
# Init
|
| 69 |
-
|
| 70 |
-
|
| 71 |
|
| 72 |
# #Scores
|
| 73 |
# # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
|
@@ -75,18 +73,18 @@ auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware
|
|
| 75 |
for task in Tasks:
|
| 76 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 77 |
|
| 78 |
-
#
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
#
|
| 89 |
-
|
| 90 |
|
| 91 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 92 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
| 45 |
# halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
| 46 |
|
| 47 |
# # XXX include me back at some point
|
| 48 |
+
# selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
| 49 |
+
mmlu = Task("mmlu", "acc", "MMLU/Acc")
|
| 50 |
|
| 51 |
|
| 52 |
# These classes are for user facing column names,
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
auto_eval_column_dict = []
|
|
|
|
|
|
|
| 66 |
# Init
|
| 67 |
+
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 68 |
+
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 69 |
|
| 70 |
# #Scores
|
| 71 |
# # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
|
|
|
| 73 |
for task in Tasks:
|
| 74 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 75 |
|
| 76 |
+
# Model information
|
| 77 |
+
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 78 |
+
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 79 |
+
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 80 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 81 |
+
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 82 |
+
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 83 |
+
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 84 |
+
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 85 |
+
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 86 |
+
# Dummy column for the search bar (hidden by the custom CSS)
|
| 87 |
+
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
| 88 |
|
| 89 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 90 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|