Spaces:
Runtime error
Runtime error
Merge pull request #15 from huggingface/fix-app
Browse files- app.py +20 -22
- evaluation.py +7 -4
app.py
CHANGED
|
@@ -42,15 +42,9 @@ TASK_TO_ID = {
|
|
| 42 |
TASK_TO_DEFAULT_METRICS = {
|
| 43 |
"binary_classification": ["f1", "precision", "recall", "auc", "accuracy"],
|
| 44 |
"multi_class_classification": [
|
| 45 |
-
"
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
-
"precision_macro",
|
| 49 |
-
"precision_micro",
|
| 50 |
-
"precision_weighted",
|
| 51 |
-
"recall_macro",
|
| 52 |
-
"recall_micro",
|
| 53 |
-
"recall_weighted",
|
| 54 |
"accuracy",
|
| 55 |
],
|
| 56 |
"entity_extraction": ["precision", "recall", "f1", "accuracy"],
|
|
@@ -72,6 +66,7 @@ def get_supported_metrics():
|
|
| 72 |
except Exception as e:
|
| 73 |
print(e)
|
| 74 |
print("Skipping the following metric, which cannot load:", metric)
|
|
|
|
| 75 |
|
| 76 |
argspec = inspect.getfullargspec(metric_func.compute)
|
| 77 |
if "references" in argspec.kwonlyargs and "predictions" in argspec.kwonlyargs:
|
|
@@ -307,9 +302,7 @@ with st.expander("Advanced configuration"):
|
|
| 307 |
col_mapping[answers_text_col] = "answers.text"
|
| 308 |
col_mapping[answers_start_col] = "answers.answer_start"
|
| 309 |
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
| 313 |
st.markdown("The following metrics will be computed")
|
| 314 |
html_string = " ".join(
|
| 315 |
[
|
|
@@ -328,26 +321,31 @@ with st.form(key="form"):
|
|
| 328 |
)
|
| 329 |
st.info(
|
| 330 |
"Note: user-selected metrics will be run with their default arguments from "
|
| 331 |
-
+ "[here](https://github.com/huggingface/
|
| 332 |
)
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
| 335 |
print("Selected models:", selected_models)
|
| 336 |
|
| 337 |
-
selected_models
|
| 338 |
-
selected_models
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
|
|
|
| 345 |
|
| 346 |
submit_button = st.form_submit_button("Make submission")
|
| 347 |
|
| 348 |
if submit_button:
|
| 349 |
if len(selected_models) > 0:
|
| 350 |
-
project_id = str(uuid.uuid4())
|
| 351 |
payload = {
|
| 352 |
"username": AUTOTRAIN_USERNAME,
|
| 353 |
"proj_name": f"eval-project-{project_id}",
|
|
|
|
| 42 |
TASK_TO_DEFAULT_METRICS = {
|
| 43 |
"binary_classification": ["f1", "precision", "recall", "auc", "accuracy"],
|
| 44 |
"multi_class_classification": [
|
| 45 |
+
"f1",
|
| 46 |
+
"precision",
|
| 47 |
+
"recall",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
"accuracy",
|
| 49 |
],
|
| 50 |
"entity_extraction": ["precision", "recall", "f1", "accuracy"],
|
|
|
|
| 66 |
except Exception as e:
|
| 67 |
print(e)
|
| 68 |
print("Skipping the following metric, which cannot load:", metric)
|
| 69 |
+
continue
|
| 70 |
|
| 71 |
argspec = inspect.getfullargspec(metric_func.compute)
|
| 72 |
if "references" in argspec.kwonlyargs and "predictions" in argspec.kwonlyargs:
|
|
|
|
| 302 |
col_mapping[answers_text_col] = "answers.text"
|
| 303 |
col_mapping[answers_start_col] = "answers.answer_start"
|
| 304 |
|
| 305 |
+
st.markdown("**Select metrics**")
|
|
|
|
|
|
|
| 306 |
st.markdown("The following metrics will be computed")
|
| 307 |
html_string = " ".join(
|
| 308 |
[
|
|
|
|
| 321 |
)
|
| 322 |
st.info(
|
| 323 |
"Note: user-selected metrics will be run with their default arguments from "
|
| 324 |
+
+ "[here](https://github.com/huggingface/evaluate/tree/main/metrics)"
|
| 325 |
)
|
| 326 |
|
| 327 |
+
with st.form(key="form"):
|
| 328 |
+
|
| 329 |
+
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
| 330 |
+
|
| 331 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
| 332 |
print("Selected models:", selected_models)
|
| 333 |
|
| 334 |
+
if len(selected_models) > 0:
|
| 335 |
+
selected_models = filter_evaluated_models(
|
| 336 |
+
selected_models,
|
| 337 |
+
selected_task,
|
| 338 |
+
selected_dataset,
|
| 339 |
+
selected_config,
|
| 340 |
+
selected_split,
|
| 341 |
+
)
|
| 342 |
+
print("Selected models:", selected_models)
|
| 343 |
|
| 344 |
submit_button = st.form_submit_button("Make submission")
|
| 345 |
|
| 346 |
if submit_button:
|
| 347 |
if len(selected_models) > 0:
|
| 348 |
+
project_id = str(uuid.uuid4())
|
| 349 |
payload = {
|
| 350 |
"username": AUTOTRAIN_USERNAME,
|
| 351 |
"proj_name": f"eval-project-{project_id}",
|
evaluation.py
CHANGED
|
@@ -15,10 +15,13 @@ class EvaluationInfo:
|
|
| 15 |
|
| 16 |
|
| 17 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def get_evaluation_ids():
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
| 18 |
+
if dataset_info.cardData is not None:
|
| 19 |
+
metadata = dataset_info.cardData["eval_info"]
|
| 20 |
+
metadata.pop("col_mapping", None)
|
| 21 |
+
evaluation_info = EvaluationInfo(**metadata)
|
| 22 |
+
return hash(evaluation_info)
|
| 23 |
+
else:
|
| 24 |
+
return None
|
| 25 |
|
| 26 |
|
| 27 |
def get_evaluation_ids():
|