Spaces:
Runtime error
Runtime error
Integrate Omar's feedback
Browse files
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: π
|
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: red
|
| 6 |
sdk: streamlit
|
| 7 |
-
sdk_version: 1.
|
| 8 |
app_file: app.py
|
| 9 |
---
|
| 10 |
|
|
|
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: red
|
| 6 |
sdk: streamlit
|
| 7 |
+
sdk_version: 1.10.0
|
| 8 |
app_file: app.py
|
| 9 |
---
|
| 10 |
|
app.py
CHANGED
|
@@ -59,9 +59,12 @@ SUPPORTED_TASKS = list(TASK_TO_ID.keys())
|
|
| 59 |
|
| 60 |
@st.cache
|
| 61 |
def get_supported_metrics():
|
| 62 |
-
metrics = list_metrics()
|
| 63 |
supported_metrics = []
|
| 64 |
for metric in tqdm(metrics):
|
|
|
|
|
|
|
|
|
|
| 65 |
try:
|
| 66 |
metric_func = load(metric)
|
| 67 |
except Exception as e:
|
|
@@ -93,14 +96,15 @@ supported_metrics = get_supported_metrics()
|
|
| 93 |
#######
|
| 94 |
# APP #
|
| 95 |
#######
|
| 96 |
-
st.title("Evaluation
|
| 97 |
st.markdown(
|
| 98 |
"""
|
| 99 |
-
Welcome to Hugging Face's
|
| 100 |
you to evaluate π€ Transformers
|
| 101 |
[models](https://huggingface.co/models?library=transformers&sort=downloads)
|
| 102 |
-
|
| 103 |
-
below. The results of your evaluation will be
|
|
|
|
| 104 |
leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
|
| 105 |
"""
|
| 106 |
)
|
|
@@ -112,7 +116,12 @@ if "dataset" in query_params:
|
|
| 112 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
|
| 113 |
default_dataset = query_params["dataset"][0]
|
| 114 |
|
| 115 |
-
selected_dataset = st.selectbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
| 117 |
|
| 118 |
|
|
@@ -262,9 +271,10 @@ with st.expander("Advanced configuration"):
|
|
| 262 |
col_mapping[target_col] = "target"
|
| 263 |
|
| 264 |
elif selected_task == "extractive_question_answering":
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
|
|
|
| 268 |
with col1:
|
| 269 |
st.markdown("`context` column")
|
| 270 |
st.text("")
|
|
@@ -327,14 +337,18 @@ with st.expander("Advanced configuration"):
|
|
| 327 |
list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
|
| 328 |
)
|
| 329 |
st.info(
|
| 330 |
-
"""
|
| 331 |
Check out the [available metrics](https://huggingface.co/metrics) for more details."""
|
| 332 |
)
|
| 333 |
|
| 334 |
with st.form(key="form"):
|
| 335 |
|
| 336 |
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
| 337 |
-
selected_models = st.multiselect(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
print("Selected models:", selected_models)
|
| 339 |
|
| 340 |
if len(selected_models) > 0:
|
|
@@ -347,7 +361,7 @@ with st.form(key="form"):
|
|
| 347 |
)
|
| 348 |
print("Selected models:", selected_models)
|
| 349 |
|
| 350 |
-
submit_button = st.form_submit_button("
|
| 351 |
|
| 352 |
if submit_button:
|
| 353 |
if len(selected_models) > 0:
|
|
|
|
| 59 |
|
| 60 |
@st.cache
|
| 61 |
def get_supported_metrics():
|
| 62 |
+
metrics = [metric.id for metric in list_metrics()]
|
| 63 |
supported_metrics = []
|
| 64 |
for metric in tqdm(metrics):
|
| 65 |
+
# TODO: this currently requires all metric dependencies to be installed
|
| 66 |
+
# in the same environment. Refactor to avoid needing to actually load
|
| 67 |
+
# the metric.
|
| 68 |
try:
|
| 69 |
metric_func = load(metric)
|
| 70 |
except Exception as e:
|
|
|
|
| 96 |
#######
|
| 97 |
# APP #
|
| 98 |
#######
|
| 99 |
+
st.title("Evaluation on the Hub")
|
| 100 |
st.markdown(
|
| 101 |
"""
|
| 102 |
+
Welcome to Hugging Face's automatic model evaluator! This application allows
|
| 103 |
you to evaluate π€ Transformers
|
| 104 |
[models](https://huggingface.co/models?library=transformers&sort=downloads)
|
| 105 |
+
across a wide variety of datasets on the Hub -- all for free! Please select
|
| 106 |
+
the dataset and configuration below. The results of your evaluation will be
|
| 107 |
+
displayed on the [public
|
| 108 |
leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
|
| 109 |
"""
|
| 110 |
)
|
|
|
|
| 116 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
|
| 117 |
default_dataset = query_params["dataset"][0]
|
| 118 |
|
| 119 |
+
selected_dataset = st.selectbox(
|
| 120 |
+
"Select a dataset",
|
| 121 |
+
all_datasets,
|
| 122 |
+
index=all_datasets.index(default_dataset),
|
| 123 |
+
help="Datasets with metadata can be evaluated with 1-click. Check out the [documentation](https://huggingface.co/docs/hub/datasets-cards) to add evaluation metadata to a dataset.",
|
| 124 |
+
)
|
| 125 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
| 126 |
|
| 127 |
|
|
|
|
| 271 |
col_mapping[target_col] = "target"
|
| 272 |
|
| 273 |
elif selected_task == "extractive_question_answering":
|
| 274 |
+
if metadata is not None:
|
| 275 |
+
col_mapping = metadata[0]["col_mapping"]
|
| 276 |
+
# Hub YAML parser converts periods to hyphens, so we remap them here
|
| 277 |
+
col_mapping = format_col_mapping(col_mapping)
|
| 278 |
with col1:
|
| 279 |
st.markdown("`context` column")
|
| 280 |
st.text("")
|
|
|
|
| 337 |
list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
|
| 338 |
)
|
| 339 |
st.info(
|
| 340 |
+
"""Note: user-selected metrics will be run with their default arguments. \
|
| 341 |
Check out the [available metrics](https://huggingface.co/metrics) for more details."""
|
| 342 |
)
|
| 343 |
|
| 344 |
with st.form(key="form"):
|
| 345 |
|
| 346 |
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
| 347 |
+
selected_models = st.multiselect(
|
| 348 |
+
"Select the models you wish to evaluate",
|
| 349 |
+
compatible_models,
|
| 350 |
+
help="Don't see your model in this list? Add the dataset and task it was trained to the [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)",
|
| 351 |
+
)
|
| 352 |
print("Selected models:", selected_models)
|
| 353 |
|
| 354 |
if len(selected_models) > 0:
|
|
|
|
| 361 |
)
|
| 362 |
print("Selected models:", selected_models)
|
| 363 |
|
| 364 |
+
submit_button = st.form_submit_button("Evaluate models")
|
| 365 |
|
| 366 |
if submit_button:
|
| 367 |
if len(selected_models) > 0:
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
huggingface-hub<0.8
|
| 2 |
python-dotenv
|
| 3 |
-
streamlit==1.
|
| 4 |
datasets<2.3
|
| 5 |
evaluate<0.2
|
| 6 |
# Dataset specific deps
|
|
|
|
| 1 |
huggingface-hub<0.8
|
| 2 |
python-dotenv
|
| 3 |
+
streamlit==1.10.0
|
| 4 |
datasets<2.3
|
| 5 |
evaluate<0.2
|
| 6 |
# Dataset specific deps
|
utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import Dict, Union
|
| 2 |
|
| 3 |
import requests
|
| 4 |
-
from huggingface_hub import HfApi, ModelFilter
|
| 5 |
|
| 6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
| 7 |
"binary_classification": "text-classification",
|
|
@@ -55,9 +55,9 @@ def http_get(path: str, domain: str, token: str = None, params: dict = None) ->
|
|
| 55 |
|
| 56 |
|
| 57 |
def get_metadata(dataset_name: str) -> Union[Dict, None]:
|
| 58 |
-
data =
|
| 59 |
-
if data
|
| 60 |
-
return data
|
| 61 |
else:
|
| 62 |
return None
|
| 63 |
|
|
|
|
| 1 |
from typing import Dict, Union
|
| 2 |
|
| 3 |
import requests
|
| 4 |
+
from huggingface_hub import HfApi, ModelFilter, dataset_info
|
| 5 |
|
| 6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
| 7 |
"binary_classification": "text-classification",
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def get_metadata(dataset_name: str) -> Union[Dict, None]:
|
| 58 |
+
data = dataset_info(dataset_name)
|
| 59 |
+
if data.cardData is not None and "train-eval-index" in data.cardData.keys():
|
| 60 |
+
return data.cardData["train-eval-index"]
|
| 61 |
else:
|
| 62 |
return None
|
| 63 |
|