Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
5d4059c
1
Parent(s):
238bffb
[Add] filter gated models
Browse files- app.py +5 -3
- src/dataloading.py +13 -7
app.py
CHANGED
|
@@ -87,7 +87,7 @@ def update_datasets_based_on_models(selected_models, current_dataset):
|
|
| 87 |
return gr.update(choices=[], value=None)
|
| 88 |
|
| 89 |
links_markdown = """
|
| 90 |
-
[📄 Paper](https://arxiv.org/
|
| 91 |
[☯ Homepage](https://model-similarity.github.io/) |
|
| 92 |
[🐱 Code](https://github.com/model-similarity/lm-similarity) |
|
| 93 |
[🐍 pip install lm-sim](https://pypi.org/project/lm-sim/) |
|
|
@@ -103,6 +103,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
| 103 |
dataset_dropdown = gr.Dropdown(
|
| 104 |
choices=get_leaderboard_datasets(None),
|
| 105 |
label="Select Dataset",
|
|
|
|
| 106 |
filterable=True,
|
| 107 |
interactive=True,
|
| 108 |
allow_custom_value=False,
|
|
@@ -117,6 +118,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
| 117 |
model_dropdown = gr.Dropdown(
|
| 118 |
choices=get_leaderboard_models_cached(),
|
| 119 |
label="Select Models",
|
|
|
|
| 120 |
multiselect=True,
|
| 121 |
filterable=True,
|
| 122 |
allow_custom_value=False,
|
|
@@ -154,8 +156,8 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
| 154 |
- **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
|
| 155 |
- Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
|
| 156 |
- **Models**: Open LLM Leaderboard models \n
|
| 157 |
-
- Every model is gated on Hugging Face and access has to be requested. \n
|
| 158 |
-
- We requested access
|
| 159 |
- **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
|
| 160 |
|
| 161 |
if __name__ == "__main__":
|
|
|
|
| 87 |
return gr.update(choices=[], value=None)
|
| 88 |
|
| 89 |
links_markdown = """
|
| 90 |
+
[📄 Paper](https://arxiv.org/abs/6181841) |
|
| 91 |
[☯ Homepage](https://model-similarity.github.io/) |
|
| 92 |
[🐱 Code](https://github.com/model-similarity/lm-similarity) |
|
| 93 |
[🐍 pip install lm-sim](https://pypi.org/project/lm-sim/) |
|
|
|
|
| 103 |
dataset_dropdown = gr.Dropdown(
|
| 104 |
choices=get_leaderboard_datasets(None),
|
| 105 |
label="Select Dataset",
|
| 106 |
+
value="mmlu_pro",
|
| 107 |
filterable=True,
|
| 108 |
interactive=True,
|
| 109 |
allow_custom_value=False,
|
|
|
|
| 118 |
model_dropdown = gr.Dropdown(
|
| 119 |
choices=get_leaderboard_models_cached(),
|
| 120 |
label="Select Models",
|
| 121 |
+
value=["Qwen/Qwen2.5-"],
|
| 122 |
multiselect=True,
|
| 123 |
filterable=True,
|
| 124 |
allow_custom_value=False,
|
|
|
|
| 156 |
- **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
|
| 157 |
- Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
|
| 158 |
- **Models**: Open LLM Leaderboard models \n
|
| 159 |
+
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
| 160 |
+
- We requested access for the most popular models, but some may be missing. \n
|
| 161 |
- **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
|
| 162 |
|
| 163 |
if __name__ == "__main__":
|
src/dataloading.py
CHANGED
|
@@ -14,13 +14,19 @@ def get_leaderboard_models():
|
|
| 14 |
models = []
|
| 15 |
for dataset in datasets:
|
| 16 |
if dataset.id.endswith("-details"):
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
return sorted(models)
|
| 26 |
|
|
|
|
| 14 |
models = []
|
| 15 |
for dataset in datasets:
|
| 16 |
if dataset.id.endswith("-details"):
|
| 17 |
+
dataset_id = dataset.id
|
| 18 |
+
try:
|
| 19 |
+
# Check if the dataset can be loaded
|
| 20 |
+
check_gated = datasets.get_dataset_config_names(dataset_id)
|
| 21 |
+
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
| 22 |
+
model_part = dataset.id.split("/")[-1].replace("-details", "")
|
| 23 |
+
if "__" in model_part:
|
| 24 |
+
provider, model = model_part.split("__", 1)
|
| 25 |
+
models.append(f"{provider}/{model}")
|
| 26 |
+
else:
|
| 27 |
+
models.append(model_part)
|
| 28 |
+
except Exception as e:
|
| 29 |
+
pass
|
| 30 |
|
| 31 |
return sorted(models)
|
| 32 |
|