| import pandas as pd | |
| from huggingface_hub import snapshot_download | |
| def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers): | |
| for i in range(10): | |
| try: | |
| snapshot_download( | |
| repo_id=repo_id, revision=revision, local_dir=local_dir, repo_type=repo_type, max_workers=max_workers | |
| ) | |
| return | |
| except Exception as e: | |
| print(f"Failed to download {repo_id} at {revision} with error: {e}. Retrying...") | |
| import time | |
| time.sleep(60) | |
| return | |
| def get_dataset_url(row): | |
| dataset_name = row["Benchmark"] | |
| dataset_url = row["Dataset Link"] | |
| benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>' | |
| return benchmark | |
| def get_dataset_summary_table(file_path): | |
| df = pd.read_csv(file_path) | |
| df["Benchmark"] = df.apply(lambda x: get_dataset_url(x), axis=1) | |
| df = df[["Category", "Benchmark", "Data Split", "Data Size", "Language"]] | |
| return df | |