Spaces:
Runtime error
Runtime error
Add notebooks to flush prediction repos
Browse files- app.py +25 -3
- notebooks/flush-prediction-repos.ipynb +177 -0
app.py
CHANGED
|
@@ -4,12 +4,14 @@ from pathlib import Path
|
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
|
|
|
| 7 |
from datasets import get_dataset_config_names
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from huggingface_hub import list_datasets
|
| 10 |
|
| 11 |
from evaluation import filter_evaluated_models
|
| 12 |
from utils import (
|
|
|
|
| 13 |
commit_evaluation_log,
|
| 14 |
format_col_mapping,
|
| 15 |
get_compatible_models,
|
|
@@ -146,9 +148,8 @@ selected_dataset = st.selectbox(
|
|
| 146 |
"Select a dataset",
|
| 147 |
all_datasets,
|
| 148 |
index=all_datasets.index(default_dataset),
|
| 149 |
-
help="""Datasets with metadata can be evaluated with 1-click.
|
| 150 |
-
|
| 151 |
-
evaluation metadata to a dataset.""",
|
| 152 |
)
|
| 153 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
| 154 |
|
|
@@ -495,6 +496,18 @@ with st.form(key="form"):
|
|
| 495 |
).json()
|
| 496 |
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
| 497 |
if train_json_resp["success"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
st.success("β
Successfully submitted evaluation job!")
|
| 499 |
st.markdown(
|
| 500 |
f"""
|
|
@@ -506,6 +519,15 @@ with st.form(key="form"):
|
|
| 506 |
Check your email for notifications.
|
| 507 |
* π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
| 508 |
to view the results from your submission once the Hub pull request is merged.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
"""
|
| 510 |
)
|
| 511 |
print("INFO -- Pushing evaluation job logs to the Hub")
|
|
|
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
+
import yaml
|
| 8 |
from datasets import get_dataset_config_names
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
from huggingface_hub import list_datasets
|
| 11 |
|
| 12 |
from evaluation import filter_evaluated_models
|
| 13 |
from utils import (
|
| 14 |
+
AUTOTRAIN_TASK_TO_HUB_TASK,
|
| 15 |
commit_evaluation_log,
|
| 16 |
format_col_mapping,
|
| 17 |
get_compatible_models,
|
|
|
|
| 148 |
"Select a dataset",
|
| 149 |
all_datasets,
|
| 150 |
index=all_datasets.index(default_dataset),
|
| 151 |
+
help="""Datasets with metadata can be evaluated with 1-click. Configure an evaluation job to add \
|
| 152 |
+
new metadata to a dataset card.""",
|
|
|
|
| 153 |
)
|
| 154 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
| 155 |
|
|
|
|
| 496 |
).json()
|
| 497 |
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
| 498 |
if train_json_resp["success"]:
|
| 499 |
+
train_eval_index = {
|
| 500 |
+
"train-eval-index": [
|
| 501 |
+
{
|
| 502 |
+
"config": selected_config,
|
| 503 |
+
"task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
|
| 504 |
+
"task_id": selected_task,
|
| 505 |
+
"splits": {"eval_split": selected_split},
|
| 506 |
+
"col_mapping": col_mapping,
|
| 507 |
+
}
|
| 508 |
+
]
|
| 509 |
+
}
|
| 510 |
+
selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
|
| 511 |
st.success("β
Successfully submitted evaluation job!")
|
| 512 |
st.markdown(
|
| 513 |
f"""
|
|
|
|
| 519 |
Check your email for notifications.
|
| 520 |
* π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
| 521 |
to view the results from your submission once the Hub pull request is merged.
|
| 522 |
+
* Add the following metadata to the \
|
| 523 |
+
[dataset card](https://huggingface.co/datasets/{selected_dataset}/blob/main/README.md) \
|
| 524 |
+
to enable 1-click evaluations:
|
| 525 |
+
"""
|
| 526 |
+
)
|
| 527 |
+
st.markdown(
|
| 528 |
+
f"""
|
| 529 |
+
```yaml
|
| 530 |
+
{selected_metadata}
|
| 531 |
"""
|
| 532 |
)
|
| 533 |
print("INFO -- Pushing evaluation job logs to the Hub")
|
notebooks/flush-prediction-repos.ipynb
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "c8093b9e-ca6a-423d-96c3-5fe21f7109a1",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"## Imports"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": 1,
|
| 14 |
+
"id": "efe8cda7-a687-4867-b1f0-8efbcd428681",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"import os\n",
|
| 19 |
+
"from pathlib import Path\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"from dotenv import load_dotenv\n",
|
| 22 |
+
"from huggingface_hub import DatasetFilter, delete_repo, list_datasets\n",
|
| 23 |
+
"from tqdm.auto import tqdm\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"if Path(\".env\").is_file():\n",
|
| 26 |
+
" load_dotenv(\".env\")\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"HF_TOKEN = os.getenv(\"HF_TOKEN\")"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"id": "8f6e01f0-b658-451f-999c-e08d9f4bbbd3",
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"source": [
|
| 36 |
+
"## Get all prediction repos from autoevaluate org"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": 2,
|
| 42 |
+
"id": "2e369478-66d3-498d-a8fd-95bc9180f362",
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"def get_prediction_repos():\n",
|
| 47 |
+
" all_repos = list_datasets(author=\"autoevaluate\")\n",
|
| 48 |
+
" prediction_repos = [\n",
|
| 49 |
+
" repo for repo in all_repos if repo.id.split(\"/\")[1].startswith(\"autoeval-\")\n",
|
| 50 |
+
" ]\n",
|
| 51 |
+
" return prediction_repos"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 3,
|
| 57 |
+
"id": "542db019-d01f-42f5-bef4-888dae8eeadb",
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [
|
| 60 |
+
{
|
| 61 |
+
"data": {
|
| 62 |
+
"text/plain": [
|
| 63 |
+
"66"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
"execution_count": 3,
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"output_type": "execute_result"
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
+
"source": [
|
| 72 |
+
"prediction_repos = get_prediction_repos()\n",
|
| 73 |
+
"len(prediction_repos)"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"cell_type": "code",
|
| 78 |
+
"execution_count": 4,
|
| 79 |
+
"id": "331cfabf-4b73-490f-8d6a-86b5bc162666",
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [
|
| 82 |
+
{
|
| 83 |
+
"data": {
|
| 84 |
+
"text/plain": [
|
| 85 |
+
"DatasetInfo: {\n",
|
| 86 |
+
"\tid: autoevaluate/autoeval-staging-eval-project-9dcc51b5-6464670\n",
|
| 87 |
+
"\tsha: d3bb02be592d167f7a217ac9341d187142d9a90a\n",
|
| 88 |
+
"\tlastModified: 2022-06-13T14:54:34.000Z\n",
|
| 89 |
+
"\ttags: ['type:predictions', 'tags:autotrain', 'tags:evaluation', 'datasets:glue']\n",
|
| 90 |
+
"\tprivate: False\n",
|
| 91 |
+
"\tauthor: autoevaluate\n",
|
| 92 |
+
"\tdescription: None\n",
|
| 93 |
+
"\tcitation: None\n",
|
| 94 |
+
"\tcardData: None\n",
|
| 95 |
+
"\tsiblings: None\n",
|
| 96 |
+
"\tgated: False\n",
|
| 97 |
+
"\tdownloads: 12\n",
|
| 98 |
+
"}"
|
| 99 |
+
]
|
| 100 |
+
},
|
| 101 |
+
"execution_count": 4,
|
| 102 |
+
"metadata": {},
|
| 103 |
+
"output_type": "execute_result"
|
| 104 |
+
}
|
| 105 |
+
],
|
| 106 |
+
"source": [
|
| 107 |
+
"prediction_repos[0]"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "markdown",
|
| 112 |
+
"id": "57a86b69-ffe8-4035-8f3d-5c917d8ce7bf",
|
| 113 |
+
"metadata": {},
|
| 114 |
+
"source": [
|
| 115 |
+
"## Delete all prediction repos"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "code",
|
| 120 |
+
"execution_count": 5,
|
| 121 |
+
"id": "6c8e23e7-2a6d-437b-9742-17f37684d9eb",
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"outputs": [
|
| 124 |
+
{
|
| 125 |
+
"data": {
|
| 126 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 127 |
+
"model_id": "06fa304dcc6d44e39205b20a5e488052",
|
| 128 |
+
"version_major": 2,
|
| 129 |
+
"version_minor": 0
|
| 130 |
+
},
|
| 131 |
+
"text/plain": [
|
| 132 |
+
" 0%| | 0/66 [00:00<?, ?it/s]"
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"output_type": "display_data"
|
| 137 |
+
}
|
| 138 |
+
],
|
| 139 |
+
"source": [
|
| 140 |
+
"for repo in tqdm(prediction_repos):\n",
|
| 141 |
+
" delete_repo(\n",
|
| 142 |
+
" repo_id=repo.id,\n",
|
| 143 |
+
" repo_type=\"dataset\",\n",
|
| 144 |
+
" )"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "code",
|
| 149 |
+
"execution_count": null,
|
| 150 |
+
"id": "7d64b0aa-d05f-4497-9bd2-eb2fc0d8bd7a",
|
| 151 |
+
"metadata": {},
|
| 152 |
+
"outputs": [],
|
| 153 |
+
"source": []
|
| 154 |
+
}
|
| 155 |
+
],
|
| 156 |
+
"metadata": {
|
| 157 |
+
"kernelspec": {
|
| 158 |
+
"display_name": "autoevaluate",
|
| 159 |
+
"language": "python",
|
| 160 |
+
"name": "autoevaluate"
|
| 161 |
+
},
|
| 162 |
+
"language_info": {
|
| 163 |
+
"codemirror_mode": {
|
| 164 |
+
"name": "ipython",
|
| 165 |
+
"version": 3
|
| 166 |
+
},
|
| 167 |
+
"file_extension": ".py",
|
| 168 |
+
"mimetype": "text/x-python",
|
| 169 |
+
"name": "python",
|
| 170 |
+
"nbconvert_exporter": "python",
|
| 171 |
+
"pygments_lexer": "ipython3",
|
| 172 |
+
"version": "3.8.13"
|
| 173 |
+
}
|
| 174 |
+
},
|
| 175 |
+
"nbformat": 4,
|
| 176 |
+
"nbformat_minor": 5
|
| 177 |
+
}
|