Spaces:
Runtime error
Runtime error
Merge pull request #22 from huggingface/speed-metric-caching
Browse files
app.py
CHANGED
|
@@ -58,7 +58,7 @@ TASK_TO_DEFAULT_METRICS = {
|
|
| 58 |
SUPPORTED_TASKS = list(TASK_TO_ID.keys())
|
| 59 |
|
| 60 |
|
| 61 |
-
@st.
|
| 62 |
def get_supported_metrics():
|
| 63 |
metrics = [metric.id for metric in list_metrics()]
|
| 64 |
supported_metrics = []
|
|
@@ -104,9 +104,9 @@ st.markdown(
|
|
| 104 |
Welcome to Hugging Face's automatic model evaluator! This application allows
|
| 105 |
you to evaluate 🤗 Transformers
|
| 106 |
[models](https://huggingface.co/models?library=transformers&sort=downloads)
|
| 107 |
-
across a wide variety of datasets on the Hub. Please select
|
| 108 |
-
|
| 109 |
-
|
| 110 |
leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
|
| 111 |
"""
|
| 112 |
)
|
|
@@ -128,6 +128,17 @@ selected_dataset = st.selectbox(
|
|
| 128 |
)
|
| 129 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
metadata = get_metadata(selected_dataset)
|
| 133 |
print(f"INFO -- Dataset metadata: {metadata}")
|
|
@@ -140,10 +151,19 @@ with st.expander("Advanced configuration"):
|
|
| 140 |
"Select a task",
|
| 141 |
SUPPORTED_TASKS,
|
| 142 |
index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
|
|
|
|
|
|
|
| 143 |
)
|
| 144 |
# Select config
|
| 145 |
configs = get_dataset_config_names(selected_dataset)
|
| 146 |
-
selected_config = st.selectbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
# Select splits
|
| 149 |
splits_resp = http_get(
|
|
@@ -166,6 +186,7 @@ with st.expander("Advanced configuration"):
|
|
| 166 |
"Select a split",
|
| 167 |
split_names,
|
| 168 |
index=split_names.index(eval_split) if eval_split is not None else 0,
|
|
|
|
| 169 |
)
|
| 170 |
|
| 171 |
# Select columns
|
|
@@ -180,7 +201,11 @@ with st.expander("Advanced configuration"):
|
|
| 180 |
).json()
|
| 181 |
col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
|
| 182 |
|
| 183 |
-
st.markdown("**Map your
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
col1, col2 = st.columns(2)
|
| 185 |
|
| 186 |
# TODO: find a better way to layout these items
|
|
@@ -196,12 +221,12 @@ with st.expander("Advanced configuration"):
|
|
| 196 |
st.markdown("`target` column")
|
| 197 |
with col2:
|
| 198 |
text_col = st.selectbox(
|
| 199 |
-
"This column should contain the text
|
| 200 |
col_names,
|
| 201 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "text")) if metadata is not None else 0,
|
| 202 |
)
|
| 203 |
target_col = st.selectbox(
|
| 204 |
-
"This column should contain the labels
|
| 205 |
col_names,
|
| 206 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
|
| 207 |
)
|
|
@@ -218,12 +243,12 @@ with st.expander("Advanced configuration"):
|
|
| 218 |
st.markdown("`tags` column")
|
| 219 |
with col2:
|
| 220 |
tokens_col = st.selectbox(
|
| 221 |
-
"This column should contain the array of tokens",
|
| 222 |
col_names,
|
| 223 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "tokens")) if metadata is not None else 0,
|
| 224 |
)
|
| 225 |
tags_col = st.selectbox(
|
| 226 |
-
"This column should contain the labels
|
| 227 |
col_names,
|
| 228 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "tags")) if metadata is not None else 0,
|
| 229 |
)
|
|
@@ -240,12 +265,12 @@ with st.expander("Advanced configuration"):
|
|
| 240 |
st.markdown("`target` column")
|
| 241 |
with col2:
|
| 242 |
text_col = st.selectbox(
|
| 243 |
-
"This column should contain the text
|
| 244 |
col_names,
|
| 245 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "source")) if metadata is not None else 0,
|
| 246 |
)
|
| 247 |
target_col = st.selectbox(
|
| 248 |
-
"This column should contain
|
| 249 |
col_names,
|
| 250 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
|
| 251 |
)
|
|
@@ -262,12 +287,12 @@ with st.expander("Advanced configuration"):
|
|
| 262 |
st.markdown("`target` column")
|
| 263 |
with col2:
|
| 264 |
text_col = st.selectbox(
|
| 265 |
-
"This column should contain the text
|
| 266 |
col_names,
|
| 267 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "text")) if metadata is not None else 0,
|
| 268 |
)
|
| 269 |
target_col = st.selectbox(
|
| 270 |
-
"This column should contain
|
| 271 |
col_names,
|
| 272 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
|
| 273 |
)
|
|
@@ -313,7 +338,7 @@ with st.expander("Advanced configuration"):
|
|
| 313 |
index=col_names.index(get_key(col_mapping, "answers.text")) if metadata is not None else 0,
|
| 314 |
)
|
| 315 |
answers_start_col = st.selectbox(
|
| 316 |
-
"This column should contain the indices in the context of the first character of each answers.text",
|
| 317 |
col_names,
|
| 318 |
index=col_names.index(get_key(col_mapping, "answers.answer_start")) if metadata is not None else 0,
|
| 319 |
)
|
|
@@ -350,7 +375,7 @@ with st.form(key="form"):
|
|
| 350 |
selected_models = st.multiselect(
|
| 351 |
"Select the models you wish to evaluate",
|
| 352 |
compatible_models,
|
| 353 |
-
help="""Don't see your model in this list? Add the dataset and task it was trained to the \
|
| 354 |
[model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
|
| 355 |
)
|
| 356 |
print("INFO -- Selected models before filter:", selected_models)
|
|
|
|
| 58 |
SUPPORTED_TASKS = list(TASK_TO_ID.keys())
|
| 59 |
|
| 60 |
|
| 61 |
+
@st.experimental_memo
|
| 62 |
def get_supported_metrics():
|
| 63 |
metrics = [metric.id for metric in list_metrics()]
|
| 64 |
supported_metrics = []
|
|
|
|
| 104 |
Welcome to Hugging Face's automatic model evaluator! This application allows
|
| 105 |
you to evaluate 🤗 Transformers
|
| 106 |
[models](https://huggingface.co/models?library=transformers&sort=downloads)
|
| 107 |
+
across a wide variety of datasets on the Hub. Please select the dataset and
|
| 108 |
+
configuration below. The results of your evaluation will be displayed on the
|
| 109 |
+
[public
|
| 110 |
leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
|
| 111 |
"""
|
| 112 |
)
|
|
|
|
| 128 |
)
|
| 129 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
| 130 |
|
| 131 |
+
# Check if selected dataset can be streamed
|
| 132 |
+
is_valid_dataset = http_get(
|
| 133 |
+
path="/is-valid",
|
| 134 |
+
domain=DATASETS_PREVIEW_API,
|
| 135 |
+
params={"dataset": selected_dataset},
|
| 136 |
+
).json()
|
| 137 |
+
if is_valid_dataset["valid"] is False:
|
| 138 |
+
st.error(
|
| 139 |
+
"""The dataset you selected is not currently supported. Open a \
|
| 140 |
+
[discussion](https://huggingface.co/spaces/autoevaluate/autoevaluate/discussions) for support."""
|
| 141 |
+
)
|
| 142 |
|
| 143 |
metadata = get_metadata(selected_dataset)
|
| 144 |
print(f"INFO -- Dataset metadata: {metadata}")
|
|
|
|
| 151 |
"Select a task",
|
| 152 |
SUPPORTED_TASKS,
|
| 153 |
index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
|
| 154 |
+
help="""Don't see your favourite task here? Open a \
|
| 155 |
+
[discussion](https://huggingface.co/spaces/autoevaluate/autoevaluate/discussions) to request it!""",
|
| 156 |
)
|
| 157 |
# Select config
|
| 158 |
configs = get_dataset_config_names(selected_dataset)
|
| 159 |
+
selected_config = st.selectbox(
|
| 160 |
+
"Select a config",
|
| 161 |
+
configs,
|
| 162 |
+
help="""Some datasets contain several sub-datasets, known as _configurations_. \
|
| 163 |
+
Select one to evaluate your models on. \
|
| 164 |
+
See the [docs](https://huggingface.co/docs/datasets/master/en/load_hub#configurations) for more details.
|
| 165 |
+
""",
|
| 166 |
+
)
|
| 167 |
|
| 168 |
# Select splits
|
| 169 |
splits_resp = http_get(
|
|
|
|
| 186 |
"Select a split",
|
| 187 |
split_names,
|
| 188 |
index=split_names.index(eval_split) if eval_split is not None else 0,
|
| 189 |
+
help="Be wary when evaluating models on the `train` split.",
|
| 190 |
)
|
| 191 |
|
| 192 |
# Select columns
|
|
|
|
| 201 |
).json()
|
| 202 |
col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
|
| 203 |
|
| 204 |
+
st.markdown("**Map your dataset columns**")
|
| 205 |
+
st.markdown(
|
| 206 |
+
"""The model evaluator uses a standardised set of column names for the input examples and labels. \
|
| 207 |
+
Please define the mapping between your dataset columns (right) and the standardised column names (left)."""
|
| 208 |
+
)
|
| 209 |
col1, col2 = st.columns(2)
|
| 210 |
|
| 211 |
# TODO: find a better way to layout these items
|
|
|
|
| 221 |
st.markdown("`target` column")
|
| 222 |
with col2:
|
| 223 |
text_col = st.selectbox(
|
| 224 |
+
"This column should contain the text to be classified",
|
| 225 |
col_names,
|
| 226 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "text")) if metadata is not None else 0,
|
| 227 |
)
|
| 228 |
target_col = st.selectbox(
|
| 229 |
+
"This column should contain the labels associated with the text",
|
| 230 |
col_names,
|
| 231 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
|
| 232 |
)
|
|
|
|
| 243 |
st.markdown("`tags` column")
|
| 244 |
with col2:
|
| 245 |
tokens_col = st.selectbox(
|
| 246 |
+
"This column should contain the array of tokens to be classified",
|
| 247 |
col_names,
|
| 248 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "tokens")) if metadata is not None else 0,
|
| 249 |
)
|
| 250 |
tags_col = st.selectbox(
|
| 251 |
+
"This column should contain the labels associated with each part of the text",
|
| 252 |
col_names,
|
| 253 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "tags")) if metadata is not None else 0,
|
| 254 |
)
|
|
|
|
| 265 |
st.markdown("`target` column")
|
| 266 |
with col2:
|
| 267 |
text_col = st.selectbox(
|
| 268 |
+
"This column should contain the text to be translated",
|
| 269 |
col_names,
|
| 270 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "source")) if metadata is not None else 0,
|
| 271 |
)
|
| 272 |
target_col = st.selectbox(
|
| 273 |
+
"This column should contain the target translation",
|
| 274 |
col_names,
|
| 275 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
|
| 276 |
)
|
|
|
|
| 287 |
st.markdown("`target` column")
|
| 288 |
with col2:
|
| 289 |
text_col = st.selectbox(
|
| 290 |
+
"This column should contain the text to be summarized",
|
| 291 |
col_names,
|
| 292 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "text")) if metadata is not None else 0,
|
| 293 |
)
|
| 294 |
target_col = st.selectbox(
|
| 295 |
+
"This column should contain the target summary",
|
| 296 |
col_names,
|
| 297 |
index=col_names.index(get_key(metadata[0]["col_mapping"], "target")) if metadata is not None else 0,
|
| 298 |
)
|
|
|
|
| 338 |
index=col_names.index(get_key(col_mapping, "answers.text")) if metadata is not None else 0,
|
| 339 |
)
|
| 340 |
answers_start_col = st.selectbox(
|
| 341 |
+
"This column should contain the indices in the context of the first character of each `answers.text`",
|
| 342 |
col_names,
|
| 343 |
index=col_names.index(get_key(col_mapping, "answers.answer_start")) if metadata is not None else 0,
|
| 344 |
)
|
|
|
|
| 375 |
selected_models = st.multiselect(
|
| 376 |
"Select the models you wish to evaluate",
|
| 377 |
compatible_models,
|
| 378 |
+
help="""Don't see your model in this list? Add the dataset and task it was trained on to the \
|
| 379 |
[model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
|
| 380 |
)
|
| 381 |
print("INFO -- Selected models before filter:", selected_models)
|