Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
d2471f2
1
Parent(s):
1072829
[Fix, Add] fix bug with metric names
Browse files- app.py +2 -1
- src/similarity.py +3 -3
app.py
CHANGED
|
@@ -118,7 +118,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
| 118 |
model_dropdown = gr.Dropdown(
|
| 119 |
choices=get_leaderboard_models_cached(),
|
| 120 |
label="Select Models",
|
| 121 |
-
value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "
|
| 122 |
multiselect=True,
|
| 123 |
filterable=True,
|
| 124 |
allow_custom_value=False,
|
|
@@ -158,6 +158,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
| 158 |
- **Models**: Open LLM Leaderboard models \n
|
| 159 |
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
| 160 |
- We requested access for the most popular models, but some may be missing. \n
|
|
|
|
| 161 |
- **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
|
| 162 |
|
| 163 |
if __name__ == "__main__":
|
|
|
|
| 118 |
model_dropdown = gr.Dropdown(
|
| 119 |
choices=get_leaderboard_models_cached(),
|
| 120 |
label="Select Models",
|
| 121 |
+
value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "tiiuae/Falcon3-7B-Instruct", "google/gemma-2-27b-it", "Qwen/Qwen2.5-72B-Instruct"],
|
| 122 |
multiselect=True,
|
| 123 |
filterable=True,
|
| 124 |
allow_custom_value=False,
|
|
|
|
| 158 |
- **Models**: Open LLM Leaderboard models \n
|
| 159 |
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
| 160 |
- We requested access for the most popular models, but some may be missing. \n
|
| 161 |
+
- Notably, loading data is not possible for many meta-llama and gemma models.
|
| 162 |
- **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
|
| 163 |
|
| 164 |
if __name__ == "__main__":
|
src/similarity.py
CHANGED
|
@@ -31,9 +31,9 @@ def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: li
|
|
| 31 |
|
| 32 |
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
|
| 33 |
# Select chosen metric
|
| 34 |
-
if metric_name == "
|
| 35 |
metric = CAPA()
|
| 36 |
-
elif metric_name == "
|
| 37 |
metric = CAPA(prob=False)
|
| 38 |
# Convert probabilities to one-hot
|
| 39 |
probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
|
|
@@ -51,7 +51,7 @@ def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]],
|
|
| 51 |
gt_b = gts[j]
|
| 52 |
|
| 53 |
# Format softmax outputs
|
| 54 |
-
if metric_name == "
|
| 55 |
outputs_a = [softmax(logits) for logits in outputs_a]
|
| 56 |
outputs_b = [softmax(logits) for logits in outputs_b]
|
| 57 |
|
|
|
|
| 31 |
|
| 32 |
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
|
| 33 |
# Select chosen metric
|
| 34 |
+
if metric_name == "CAPA":
|
| 35 |
metric = CAPA()
|
| 36 |
+
elif metric_name == "CAPA (det.)":
|
| 37 |
metric = CAPA(prob=False)
|
| 38 |
# Convert probabilities to one-hot
|
| 39 |
probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
|
|
|
|
| 51 |
gt_b = gts[j]
|
| 52 |
|
| 53 |
# Format softmax outputs
|
| 54 |
+
if metric_name == "CAPA":
|
| 55 |
outputs_a = [softmax(logits) for logits in outputs_a]
|
| 56 |
outputs_b = [softmax(logits) for logits in outputs_b]
|
| 57 |
|