Spaces:
Runtime error
Runtime error
Hash user metrics
Browse files- app.py +1 -0
- evaluation.py +11 -2
app.py
CHANGED
|
@@ -433,6 +433,7 @@ with st.form(key="form"):
|
|
| 433 |
selected_dataset,
|
| 434 |
selected_config,
|
| 435 |
selected_split,
|
|
|
|
| 436 |
)
|
| 437 |
print("INFO -- Selected models after filter:", selected_models)
|
| 438 |
|
|
|
|
| 433 |
selected_dataset,
|
| 434 |
selected_config,
|
| 435 |
selected_split,
|
| 436 |
+
selected_metrics,
|
| 437 |
)
|
| 438 |
print("INFO -- Selected models after filter:", selected_models)
|
| 439 |
|
evaluation.py
CHANGED
|
@@ -12,12 +12,17 @@ class EvaluationInfo:
|
|
| 12 |
dataset_name: str
|
| 13 |
dataset_config: str
|
| 14 |
dataset_split: str
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
| 18 |
if dataset_info.cardData is not None:
|
| 19 |
metadata = dataset_info.cardData["eval_info"]
|
| 20 |
metadata.pop("col_mapping", None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
evaluation_info = EvaluationInfo(**metadata)
|
| 22 |
return hash(evaluation_info)
|
| 23 |
else:
|
|
@@ -30,7 +35,7 @@ def get_evaluation_ids():
|
|
| 30 |
return [compute_evaluation_id(dset) for dset in evaluation_datasets]
|
| 31 |
|
| 32 |
|
| 33 |
-
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split):
|
| 34 |
evaluation_ids = get_evaluation_ids()
|
| 35 |
|
| 36 |
for idx, model in enumerate(models):
|
|
@@ -40,10 +45,14 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
|
|
| 40 |
dataset_name=dataset_name,
|
| 41 |
dataset_config=dataset_config,
|
| 42 |
dataset_split=dataset_split,
|
|
|
|
| 43 |
)
|
| 44 |
candidate_id = hash(evaluation_info)
|
| 45 |
if candidate_id in evaluation_ids:
|
| 46 |
-
st.info(
|
|
|
|
|
|
|
|
|
|
| 47 |
models.pop(idx)
|
| 48 |
|
| 49 |
return models
|
|
|
|
| 12 |
dataset_name: str
|
| 13 |
dataset_config: str
|
| 14 |
dataset_split: str
|
| 15 |
+
metrics: set
|
| 16 |
|
| 17 |
|
| 18 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
| 19 |
if dataset_info.cardData is not None:
|
| 20 |
metadata = dataset_info.cardData["eval_info"]
|
| 21 |
metadata.pop("col_mapping", None)
|
| 22 |
+
# TODO(lewtun): populate dataset cards with metric info
|
| 23 |
+
if "metrics" not in metadata:
|
| 24 |
+
metadata["metrics"] = frozenset()
|
| 25 |
+
metadata["metrics"] = frozenset(metadata["metrics"])
|
| 26 |
evaluation_info = EvaluationInfo(**metadata)
|
| 27 |
return hash(evaluation_info)
|
| 28 |
else:
|
|
|
|
| 35 |
return [compute_evaluation_id(dset) for dset in evaluation_datasets]
|
| 36 |
|
| 37 |
|
| 38 |
+
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
| 39 |
evaluation_ids = get_evaluation_ids()
|
| 40 |
|
| 41 |
for idx, model in enumerate(models):
|
|
|
|
| 45 |
dataset_name=dataset_name,
|
| 46 |
dataset_config=dataset_config,
|
| 47 |
dataset_split=dataset_split,
|
| 48 |
+
metrics=frozenset(metrics),
|
| 49 |
)
|
| 50 |
candidate_id = hash(evaluation_info)
|
| 51 |
if candidate_id in evaluation_ids:
|
| 52 |
+
st.info(
|
| 53 |
+
f"Model `{model}` has already been evaluated on this configuration. \
|
| 54 |
+
This model will be excluded from the evaluation job..."
|
| 55 |
+
)
|
| 56 |
models.pop(idx)
|
| 57 |
|
| 58 |
return models
|