Spaces:
Runtime error
Runtime error
Tristan Thrush
commited on
Commit
·
bb28608
1
Parent(s):
fe77dfe
added selection of verified results
Browse files
app.py
CHANGED
|
@@ -10,9 +10,12 @@ from os.path import exists
|
|
| 10 |
import threading
|
| 11 |
|
| 12 |
|
| 13 |
-
def get_model_ids():
|
| 14 |
api = HfApi()
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
model_ids = [x.modelId for x in models]
|
| 17 |
return model_ids
|
| 18 |
|
|
@@ -42,24 +45,39 @@ def parse_metric_value(value):
|
|
| 42 |
return value
|
| 43 |
|
| 44 |
|
| 45 |
-
def parse_metrics_rows(meta):
|
| 46 |
if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
|
| 47 |
return None
|
| 48 |
for result in meta["model-index"][0]["results"]:
|
| 49 |
if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
|
| 50 |
continue
|
| 51 |
dataset = result["dataset"]["type"]
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
for metric in result["metrics"]:
|
| 56 |
type = metric["type"].lower().strip()
|
|
|
|
|
|
|
|
|
|
| 57 |
value = parse_metric_value(metric.get("value", None))
|
| 58 |
if value is None:
|
| 59 |
continue
|
| 60 |
-
if type
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
yield row
|
| 64 |
|
| 65 |
@st.cache(ttl=3600)
|
|
@@ -68,11 +86,12 @@ def get_data_wrapper():
|
|
| 68 |
def get_data():
|
| 69 |
data = []
|
| 70 |
model_ids = get_model_ids()
|
|
|
|
| 71 |
for model_id in tqdm(model_ids):
|
| 72 |
meta = get_metadata(model_id)
|
| 73 |
if meta is None:
|
| 74 |
continue
|
| 75 |
-
for row in parse_metrics_rows(meta):
|
| 76 |
if row is None:
|
| 77 |
continue
|
| 78 |
row["model_id"] = model_id
|
|
@@ -108,6 +127,10 @@ if "dataset" in query_params:
|
|
| 108 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
|
| 109 |
default_dataset = query_params["dataset"][0]
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
dataset = st.sidebar.selectbox(
|
| 112 |
"Dataset",
|
| 113 |
selectable_datasets,
|
|
@@ -118,15 +141,19 @@ st.experimental_set_query_params(**{"dataset": [dataset]})
|
|
| 118 |
dataset_df = dataframe[dataframe.dataset == dataset]
|
| 119 |
dataset_df = dataset_df.dropna(axis="columns", how="all")
|
| 120 |
|
|
|
|
|
|
|
|
|
|
| 121 |
selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset"), dataset_df.columns))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
sorting_metric = st.sidebar.radio(
|
| 123 |
"Sorting Metric",
|
| 124 |
selectable_metrics,
|
| 125 |
)
|
| 126 |
|
| 127 |
-
dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
|
| 128 |
-
dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
|
| 129 |
-
|
| 130 |
st.markdown(
|
| 131 |
"Please click on the model's name to be redirected to its model card."
|
| 132 |
)
|
|
|
|
| 10 |
import threading
|
| 11 |
|
| 12 |
|
| 13 |
+
def get_model_ids(author=None):
|
| 14 |
api = HfApi()
|
| 15 |
+
if author is None:
|
| 16 |
+
models = api.list_models(filter="model-index")
|
| 17 |
+
else:
|
| 18 |
+
models = api.list_models(filter="model-index", author="autoevaluate")
|
| 19 |
model_ids = [x.modelId for x in models]
|
| 20 |
return model_ids
|
| 21 |
|
|
|
|
| 45 |
return value
|
| 46 |
|
| 47 |
|
| 48 |
+
def parse_metrics_rows(meta, from_autoeval=False):
|
| 49 |
if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
|
| 50 |
return None
|
| 51 |
for result in meta["model-index"][0]["results"]:
|
| 52 |
if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
|
| 53 |
continue
|
| 54 |
dataset = result["dataset"]["type"]
|
| 55 |
+
row = {"dataset": dataset, "split": None, "config": None, "verified": from_autoeval}
|
| 56 |
+
if "split" in result["dataset"]:
|
| 57 |
+
row["split"] = result["dataset"]["split"]
|
| 58 |
+
if "config" in result["dataset"]:
|
| 59 |
+
row["config"] = result["dataset"]["config"]
|
| 60 |
for metric in result["metrics"]:
|
| 61 |
type = metric["type"].lower().strip()
|
| 62 |
+
if type not in ("dataset", "split", "config", "verified"):
|
| 63 |
+
# Metrics are not allowed to be named "dataset", "split", "config", or "verified".
|
| 64 |
+
continue
|
| 65 |
value = parse_metric_value(metric.get("value", None))
|
| 66 |
if value is None:
|
| 67 |
continue
|
| 68 |
+
if type in row:
|
| 69 |
+
new_metric_better = value < row[type] if type in ascending_metrics else value > row[type]
|
| 70 |
+
if type not in row or new_metric_better:
|
| 71 |
+
# overwrite the metric if the new value is better.
|
| 72 |
+
|
| 73 |
+
if from_autoeval:
|
| 74 |
+
# if the metric is from autoeval, only include it in the leaderboard if
|
| 75 |
+
# it is a verified metric. Unverified metrics are already included
|
| 76 |
+
# in the leaderboard from the unverified model card.
|
| 77 |
+
if "verified" in metric and metric["verified"]:
|
| 78 |
+
row[type] = value
|
| 79 |
+
else:
|
| 80 |
+
row[type] = value
|
| 81 |
yield row
|
| 82 |
|
| 83 |
@st.cache(ttl=3600)
|
|
|
|
| 86 |
def get_data():
|
| 87 |
data = []
|
| 88 |
model_ids = get_model_ids()
|
| 89 |
+
model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
|
| 90 |
for model_id in tqdm(model_ids):
|
| 91 |
meta = get_metadata(model_id)
|
| 92 |
if meta is None:
|
| 93 |
continue
|
| 94 |
+
for row in parse_metrics_rows(meta, from_autoeval=model_id in model_ids_from_autoeval):
|
| 95 |
if row is None:
|
| 96 |
continue
|
| 97 |
row["model_id"] = model_id
|
|
|
|
| 127 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
|
| 128 |
default_dataset = query_params["dataset"][0]
|
| 129 |
|
| 130 |
+
only_verified_results = st.sidebar.checkbox(
|
| 131 |
+
"Filter for Verified Results",
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
dataset = st.sidebar.selectbox(
|
| 135 |
"Dataset",
|
| 136 |
selectable_datasets,
|
|
|
|
| 141 |
dataset_df = dataframe[dataframe.dataset == dataset]
|
| 142 |
dataset_df = dataset_df.dropna(axis="columns", how="all")
|
| 143 |
|
| 144 |
+
if only_verified_results:
|
| 145 |
+
dataset_df = dataset_df[dataset_df["verified"]]
|
| 146 |
+
|
| 147 |
selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset"), dataset_df.columns))
|
| 148 |
+
|
| 149 |
+
dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
|
| 150 |
+
dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
|
| 151 |
+
|
| 152 |
sorting_metric = st.sidebar.radio(
|
| 153 |
"Sorting Metric",
|
| 154 |
selectable_metrics,
|
| 155 |
)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
| 157 |
st.markdown(
|
| 158 |
"Please click on the model's name to be redirected to its model card."
|
| 159 |
)
|