Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import os | |
| from constants import EVAL_REQUESTS_PATH | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, Repository | |
| TOKEN_HUB = os.environ.get("TOKEN_HUB", None) | |
| QUEUE_REPO = os.environ.get("QUEUE_REPO", None) | |
| QUEUE_REPO_MULTI = os.environ.get("QUEUE_REPO_MULTI", None) | |
| QUEUE_REPO_LONGFORM = "Steveeeeeeen/leaderboard_longform" | |
| QUEUE_PATH = os.environ.get("QUEUE_PATH", None) | |
| QUEUE_PATH_MULTI = os.environ.get("QUEUE_PATH_MULTI", None) | |
| QUEUE_PATH_LONGFORM = "Steveeeeeeen/leaderboard_longform" | |
| hf_api = HfApi( | |
| endpoint="https://huggingface.co", | |
| token=TOKEN_HUB, | |
| ) | |
| def load_all_info_from_dataset_hub(): | |
| eval_queue_repo = None | |
| requested_models = None | |
| passed = True | |
| if TOKEN_HUB is None: | |
| passed = False | |
| else: | |
| print("Pulling evaluation requests and results.") | |
| eval_queue_repo = Repository( | |
| local_dir=QUEUE_PATH, | |
| clone_from=QUEUE_REPO, | |
| use_auth_token=TOKEN_HUB, | |
| repo_type="dataset", | |
| ) | |
| eval_queue_repo.git_pull() | |
| # Local directory where dataset repo is cloned + folder with eval requests | |
| directory = QUEUE_PATH / EVAL_REQUESTS_PATH | |
| requested_models = get_all_requested_models(directory) | |
| requested_models = [p.stem for p in requested_models] | |
| # Local directory where dataset repo is cloned | |
| csv_results = get_csv_with_results(QUEUE_PATH) | |
| if csv_results is None: | |
| passed = False | |
| if not passed: | |
| raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.") | |
| # Load multilingual data in the same way | |
| multilingual_csv_results = load_multilingual_data() | |
| # Load longform data in the same way | |
| longform_csv_results = load_longform_data() | |
| return eval_queue_repo, requested_models, csv_results, multilingual_csv_results, longform_csv_results | |
| def load_multilingual_data(): | |
| """Load multilingual evaluation data from CSV""" | |
| multilingual_queue_path = QUEUE_PATH_MULTI | |
| try: | |
| # Try to get from dedicated multilingual HF repo first | |
| if TOKEN_HUB is not None: | |
| print("Pulling multilingual evaluation data.") | |
| try: | |
| multilingual_repo = Repository( | |
| local_dir=multilingual_queue_path, | |
| clone_from=QUEUE_REPO_MULTI, | |
| use_auth_token=TOKEN_HUB, | |
| repo_type="dataset", | |
| ) | |
| multilingual_repo.git_pull() | |
| multilingual_csv = get_csv_with_results(multilingual_queue_path) | |
| except Exception as e: | |
| print(f"Failed to pull from multilingual repo: {e}") | |
| multilingual_csv = None | |
| else: | |
| multilingual_csv = None | |
| # Fallback to local file | |
| if multilingual_csv is None: | |
| print("Using local multilingual CSV file.") | |
| multilingual_csv = get_csv_with_results(".") | |
| return multilingual_csv | |
| except Exception as e: | |
| print(f"Error loading multilingual data: {e}") | |
| return None | |
| def load_longform_data(): | |
| """Load longform evaluation data from CSV""" | |
| longform_queue_path = QUEUE_PATH_LONGFORM | |
| try: | |
| # Try to get from dedicated longform HF repo first | |
| if TOKEN_HUB is not None: | |
| print("Pulling longform evaluation data.") | |
| try: | |
| longform_repo = Repository( | |
| local_dir=longform_queue_path, | |
| clone_from=QUEUE_REPO_LONGFORM, | |
| use_auth_token=TOKEN_HUB, | |
| repo_type="dataset", | |
| ) | |
| longform_repo.git_pull() | |
| longform_csv = get_csv_with_results(longform_queue_path) | |
| except Exception as e: | |
| print(f"Failed to pull from longform repo: {e}") | |
| longform_csv = None | |
| else: | |
| longform_csv = None | |
| # Fallback to local file | |
| if longform_csv is None: | |
| print("Using local longform CSV file.") | |
| longform_csv = get_csv_with_results(".") | |
| return longform_csv | |
| except Exception as e: | |
| print(f"Error loading longform data: {e}") | |
| return None | |
| def upload_file(requested_model_name, path_or_fileobj): | |
| dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name | |
| dest_repo_file = str(dest_repo_file) | |
| hf_api.upload_file( | |
| path_or_fileobj=path_or_fileobj, | |
| path_in_repo=str(dest_repo_file), | |
| repo_id=QUEUE_REPO, | |
| token=TOKEN_HUB, | |
| repo_type="dataset", | |
| commit_message=f"Add {requested_model_name} to eval queue") | |
| def get_all_requested_models(directory): | |
| directory = Path(directory) | |
| all_requested_models = list(directory.glob("*.txt")) | |
| return all_requested_models | |
| def get_csv_with_results(directory): | |
| directory = Path(directory) | |
| all_csv_files = list(directory.glob("*.csv")) | |
| latest = [f for f in all_csv_files if f.stem.endswith("latest")] | |
| if len(latest) != 1: | |
| return None | |
| return latest[0] | |
| def is_model_on_hub(model_name, revision="main") -> bool: | |
| try: | |
| model_name = model_name.replace(" ","") | |
| author = model_name.split("/")[0] | |
| model_id = model_name.split("/")[1] | |
| if len(author) == 0 or len(model_id) == 0: | |
| return False, "is not a valid model name. Please use the format `author/model_name`." | |
| except Exception as e: | |
| return False, "is not a valid model name. Please use the format `author/model_name`." | |
| try: | |
| models = list(hf_api.list_models(author=author, search=model_id)) | |
| matched = [model_name for m in models if m.modelId == model_name] | |
| if len(matched) != 1: | |
| return False, "was not found on the hub!" | |
| else: | |
| return True, None | |
| except Exception as e: | |
| print(f"Could not get the model from the hub.: {e}") | |
| return False, "was not found on hub!" |