Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
e64ca4e
1
Parent(s):
5815cf9
[Add] cache loading data from hf
Browse files- src/dataloading.py +6 -5
- src/similarity.py +2 -2
src/dataloading.py
CHANGED
|
@@ -3,6 +3,7 @@ import numpy as np
|
|
| 3 |
from huggingface_hub import HfApi
|
| 4 |
|
| 5 |
from functools import lru_cache
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_leaderboard_models():
|
|
@@ -17,9 +18,7 @@ def get_leaderboard_models():
|
|
| 17 |
dataset_id = dataset.id
|
| 18 |
try:
|
| 19 |
# Check if the dataset can be loaded
|
| 20 |
-
print(dataset_id)
|
| 21 |
check_gated = datasets.get_dataset_config_names(dataset_id)
|
| 22 |
-
print(check_gated)
|
| 23 |
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
| 24 |
model_part = dataset_id.split("/")[-1].replace("-details", "")
|
| 25 |
if "__" in model_part:
|
|
@@ -27,7 +26,7 @@ def get_leaderboard_models():
|
|
| 27 |
models.append(f"{provider}/{model}")
|
| 28 |
else:
|
| 29 |
models.append(model_part)
|
| 30 |
-
except
|
| 31 |
pass
|
| 32 |
|
| 33 |
return sorted(models)
|
|
@@ -77,6 +76,7 @@ def filter_labels(doc):
|
|
| 77 |
return labels
|
| 78 |
|
| 79 |
|
|
|
|
| 80 |
def load_run_data(model_name, dataset_name):
|
| 81 |
try:
|
| 82 |
model_name = model_name.replace("/", "__")
|
|
@@ -104,6 +104,7 @@ def load_run_data(model_name, dataset_name):
|
|
| 104 |
return log_probs, labels
|
| 105 |
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
|
|
|
| 109 |
|
|
|
|
| 3 |
from huggingface_hub import HfApi
|
| 4 |
|
| 5 |
from functools import lru_cache
|
| 6 |
+
from datasets.exceptions import DatasetNotFoundError
|
| 7 |
|
| 8 |
|
| 9 |
def get_leaderboard_models():
|
|
|
|
| 18 |
dataset_id = dataset.id
|
| 19 |
try:
|
| 20 |
# Check if the dataset can be loaded
|
|
|
|
| 21 |
check_gated = datasets.get_dataset_config_names(dataset_id)
|
|
|
|
| 22 |
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
| 23 |
model_part = dataset_id.split("/")[-1].replace("-details", "")
|
| 24 |
if "__" in model_part:
|
|
|
|
| 26 |
models.append(f"{provider}/{model}")
|
| 27 |
else:
|
| 28 |
models.append(model_part)
|
| 29 |
+
except DatasetNotFoundError as e:
|
| 30 |
pass
|
| 31 |
|
| 32 |
return sorted(models)
|
|
|
|
| 76 |
return labels
|
| 77 |
|
| 78 |
|
| 79 |
+
|
| 80 |
def load_run_data(model_name, dataset_name):
|
| 81 |
try:
|
| 82 |
model_name = model_name.replace("/", "__")
|
|
|
|
| 104 |
return log_probs, labels
|
| 105 |
|
| 106 |
|
| 107 |
+
@lru_cache(maxsize=8)
|
| 108 |
+
def load_run_data_cached(model_name, dataset_name):
|
| 109 |
+
return load_run_data(model_name, dataset_name)
|
| 110 |
|
src/similarity.py
CHANGED
|
@@ -2,7 +2,7 @@ import numpy as np
|
|
| 2 |
|
| 3 |
from lmsim.metrics import Metrics, CAPA, EC
|
| 4 |
|
| 5 |
-
from src.dataloading import
|
| 6 |
from src.utils import softmax, one_hot
|
| 7 |
|
| 8 |
def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
|
|
@@ -10,7 +10,7 @@ def load_data_and_compute_similarities(models: list[str], dataset: str, metric_n
|
|
| 10 |
probs = []
|
| 11 |
gts = []
|
| 12 |
for model in models:
|
| 13 |
-
model_probs, model_gt =
|
| 14 |
probs.append(model_probs)
|
| 15 |
gts.append(model_gt)
|
| 16 |
|
|
|
|
| 2 |
|
| 3 |
from lmsim.metrics import Metrics, CAPA, EC
|
| 4 |
|
| 5 |
+
from src.dataloading import load_run_data_cached
|
| 6 |
from src.utils import softmax, one_hot
|
| 7 |
|
| 8 |
def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
|
|
|
|
| 10 |
probs = []
|
| 11 |
gts = []
|
| 12 |
for model in models:
|
| 13 |
+
model_probs, model_gt = load_run_data_cached(model, dataset)
|
| 14 |
probs.append(model_probs)
|
| 15 |
gts.append(model_gt)
|
| 16 |
|