Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
b774671
1
Parent(s):
03af4bc
Add multithread
Browse files
app.py
CHANGED
|
@@ -8,6 +8,8 @@ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
|
|
| 8 |
from huggingface_hub.repocard import metadata_load
|
| 9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 10 |
|
|
|
|
|
|
|
| 11 |
from utils import *
|
| 12 |
|
| 13 |
DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
|
|
@@ -196,6 +198,42 @@ def get_model_ids(rl_env):
|
|
| 196 |
model_ids = [x.modelId for x in models]
|
| 197 |
return model_ids
|
| 198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
def update_leaderboard_dataset(rl_env, path):
|
| 200 |
# Get model ids associated with rl_env
|
| 201 |
model_ids = get_model_ids(rl_env)
|
|
@@ -272,7 +310,7 @@ def run_update_dataset():
|
|
| 272 |
path_ = download_leaderboard_dataset()
|
| 273 |
for i in range(0, len(rl_envs)):
|
| 274 |
rl_env = rl_envs[i]
|
| 275 |
-
|
| 276 |
|
| 277 |
api.upload_folder(
|
| 278 |
folder_path=path_,
|
|
|
|
| 8 |
from huggingface_hub.repocard import metadata_load
|
| 9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 10 |
|
| 11 |
+
from tqdm.contrib.concurrent import thread_map
|
| 12 |
+
|
| 13 |
from utils import *
|
| 14 |
|
| 15 |
DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
|
|
|
|
| 198 |
model_ids = [x.modelId for x in models]
|
| 199 |
return model_ids
|
| 200 |
|
| 201 |
+
# Parralelized version
|
| 202 |
+
def update_leaderboard_dataset_parallel(rl_env, path):
|
| 203 |
+
# Get model ids associated with rl_env
|
| 204 |
+
model_ids = get_model_ids(rl_env)
|
| 205 |
+
|
| 206 |
+
def process_model(model_id):
|
| 207 |
+
meta = get_metadata(model_id)
|
| 208 |
+
#LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
|
| 209 |
+
if meta is None:
|
| 210 |
+
return None
|
| 211 |
+
user_id = model_id.split('/')[0]
|
| 212 |
+
row = {}
|
| 213 |
+
row["User"] = user_id
|
| 214 |
+
row["Model"] = model_id
|
| 215 |
+
accuracy = parse_metrics_accuracy(meta)
|
| 216 |
+
mean_reward, std_reward = parse_rewards(accuracy)
|
| 217 |
+
mean_reward = mean_reward if not pd.isna(mean_reward) else 0
|
| 218 |
+
std_reward = std_reward if not pd.isna(std_reward) else 0
|
| 219 |
+
row["Results"] = mean_reward - std_reward
|
| 220 |
+
row["Mean Reward"] = mean_reward
|
| 221 |
+
row["Std Reward"] = std_reward
|
| 222 |
+
return row
|
| 223 |
+
|
| 224 |
+
data = list(thread_map(process_model, model_ids, desc="Processing models"))
|
| 225 |
+
|
| 226 |
+
# Filter out None results (models with no metadata)
|
| 227 |
+
data = [row for row in data if row is not None]
|
| 228 |
+
|
| 229 |
+
ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
|
| 230 |
+
new_history = ranked_dataframe
|
| 231 |
+
file_path = path + "/" + rl_env + ".csv"
|
| 232 |
+
new_history.to_csv(file_path, index=False)
|
| 233 |
+
|
| 234 |
+
return ranked_dataframe
|
| 235 |
+
|
| 236 |
+
|
| 237 |
def update_leaderboard_dataset(rl_env, path):
|
| 238 |
# Get model ids associated with rl_env
|
| 239 |
model_ids = get_model_ids(rl_env)
|
|
|
|
| 310 |
path_ = download_leaderboard_dataset()
|
| 311 |
for i in range(0, len(rl_envs)):
|
| 312 |
rl_env = rl_envs[i]
|
| 313 |
+
update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
|
| 314 |
|
| 315 |
api.upload_folder(
|
| 316 |
folder_path=path_,
|