Commit
·
29be9e3
1
Parent(s):
04622fa
cache the model downloading
Browse files- refresh.py +16 -8
refresh.py
CHANGED
|
@@ -14,7 +14,7 @@ from utils.model_size import get_model_parameters_memory
|
|
| 14 |
from envs import LEADERBOARD_CONFIG, MODEL_META, REPO_ID, RESULTS_REPO, API
|
| 15 |
|
| 16 |
|
| 17 |
-
|
| 18 |
TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"]
|
| 19 |
BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
|
| 20 |
|
|
@@ -187,6 +187,20 @@ def get_external_model_results():
|
|
| 187 |
return EXTERNAL_MODEL_RESULTS
|
| 188 |
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
|
| 191 |
global MODEL_INFOS
|
| 192 |
|
|
@@ -230,16 +244,10 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 230 |
for model in pbar:
|
| 231 |
if model.modelId in MODELS_TO_SKIP: continue
|
| 232 |
pbar.set_description(f"Fetching {model.modelId!r} metadata")
|
| 233 |
-
|
| 234 |
-
readme_path = hf_hub_download(model.modelId, filename="README.md", etag_timeout=30)
|
| 235 |
-
except Exception:
|
| 236 |
-
print(f"ERROR: Could not fetch metadata for {model.modelId}, trying again")
|
| 237 |
-
readme_path = hf_hub_download(model.modelId, filename="README.md", etag_timeout=30)
|
| 238 |
-
meta = metadata_load(readme_path)
|
| 239 |
MODEL_INFOS[model.modelId] = {
|
| 240 |
"metadata": meta
|
| 241 |
}
|
| 242 |
-
meta = MODEL_INFOS[model.modelId]["metadata"]
|
| 243 |
if "model-index" not in meta:
|
| 244 |
continue
|
| 245 |
# meta['model-index'][0]["results"] is list of elements like:
|
|
|
|
| 14 |
from envs import LEADERBOARD_CONFIG, MODEL_META, REPO_ID, RESULTS_REPO, API
|
| 15 |
|
| 16 |
|
| 17 |
+
MODEL_CACHE = {}
|
| 18 |
TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"]
|
| 19 |
BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
|
| 20 |
|
|
|
|
| 187 |
return EXTERNAL_MODEL_RESULTS
|
| 188 |
|
| 189 |
|
| 190 |
+
def download_or_use_cache(modelId):
|
| 191 |
+
global MODEL_CACHE
|
| 192 |
+
if modelId in MODEL_CACHE:
|
| 193 |
+
return MODEL_CACHE[modelId]
|
| 194 |
+
try:
|
| 195 |
+
readme_path = hf_hub_download(modelId, filename="README.md", etag_timeout=30)
|
| 196 |
+
except Exception:
|
| 197 |
+
print(f"ERROR: Could not fetch metadata for {modelId}, trying again")
|
| 198 |
+
readme_path = hf_hub_download(modelId, filename="README.md", etag_timeout=30)
|
| 199 |
+
meta = metadata_load(readme_path)
|
| 200 |
+
MODEL_CACHE[modelId] = meta
|
| 201 |
+
return meta
|
| 202 |
+
|
| 203 |
+
|
| 204 |
def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
|
| 205 |
global MODEL_INFOS
|
| 206 |
|
|
|
|
| 244 |
for model in pbar:
|
| 245 |
if model.modelId in MODELS_TO_SKIP: continue
|
| 246 |
pbar.set_description(f"Fetching {model.modelId!r} metadata")
|
| 247 |
+
meta = download_or_use_cache(model.modelId)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
MODEL_INFOS[model.modelId] = {
|
| 249 |
"metadata": meta
|
| 250 |
}
|
|
|
|
| 251 |
if "model-index" not in meta:
|
| 252 |
continue
|
| 253 |
# meta['model-index'][0]["results"] is list of elements like:
|