Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
44c2a20
1
Parent(s):
ce4dda5
ready hyperlinks for leaderboard
Browse files- utils/arena_df_leaderboard.csv +9 -0
- utils/leaderboard.py +31 -5
- utils/models.py +2 -2
utils/arena_df_leaderboard.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,wins,losses,ties
|
| 2 |
+
Model Alpha,0,0,0
|
| 3 |
+
Model Beta,0,0,0
|
| 4 |
+
Model Delta (Refusal Specialist),0,0,0
|
| 5 |
+
Model Gamma,0,0,0
|
| 6 |
+
Qwen2.5-1.5b-Instruct,1,1,0
|
| 7 |
+
Llama-3.2-1b-Instruct,0,1,0
|
| 8 |
+
Qwen2.5-3b-Instruct,1,0,0
|
| 9 |
+
Llama-3.2-3b-Instruct,0,0,0
|
utils/leaderboard.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
| 2 |
import pandas as pd
|
| 3 |
import math
|
| 4 |
from datetime import datetime
|
|
|
|
| 5 |
|
| 6 |
# Default K-factor (determines how much a single match affects ratings)
|
| 7 |
DEFAULT_K_FACTOR = 32
|
|
@@ -9,12 +10,37 @@ DEFAULT_K_FACTOR = 32
|
|
| 9 |
# Default starting Elo
|
| 10 |
DEFAULT_ELO = 1500
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# Mapping of model names to their Hugging Face URLs
|
| 13 |
-
model_to_hf = {
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
}
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def calculate_elo_changes(winner_rating, loser_rating, k_factor=DEFAULT_K_FACTOR, draw=False):
|
| 20 |
"""
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import math
|
| 4 |
from datetime import datetime
|
| 5 |
+
from .models import models
|
| 6 |
|
| 7 |
# Default K-factor (determines how much a single match affects ratings)
|
| 8 |
DEFAULT_K_FACTOR = 32
|
|
|
|
| 10 |
# Default starting Elo
|
| 11 |
DEFAULT_ELO = 1500
|
| 12 |
|
| 13 |
+
def prepare_url(model_dict: dict):
|
| 14 |
+
"""
|
| 15 |
+
Prepare the URL for the model based on its name.
|
| 16 |
+
|
| 17 |
+
Parameters:
|
| 18 |
+
- model_dict: Dictionary containing model information
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
- URL string for the model
|
| 22 |
+
"""
|
| 23 |
+
url_dict = {}
|
| 24 |
+
# Extract the model name from the dictionary
|
| 25 |
+
model_names = model_dict.keys()
|
| 26 |
+
for name in model_names:
|
| 27 |
+
half_url = model_dict[name]
|
| 28 |
+
|
| 29 |
+
# Construct the URL using the model name
|
| 30 |
+
url = f"https://huggingface.co/{half_url}"
|
| 31 |
+
url_dict[name] = url
|
| 32 |
+
|
| 33 |
+
return url_dict
|
| 34 |
+
|
| 35 |
+
|
| 36 |
# Mapping of model names to their Hugging Face URLs
|
| 37 |
+
# model_to_hf = {
|
| 38 |
+
# "Qwen2.5-1.5b-Instruct": "https://huggingface.co/qwen/qwen2.5-1.5b-instruct",
|
| 39 |
+
# "Qwen2.5-3b-Instruct": "https://huggingface.co/qwen/qwen2.5-3b-instruct",
|
| 40 |
+
# # Add more models and their HF links here
|
| 41 |
+
# }
|
| 42 |
+
|
| 43 |
+
model_to_hf = prepare_url(models)
|
| 44 |
|
| 45 |
def calculate_elo_changes(winner_rating, loser_rating, k_factor=DEFAULT_K_FACTOR, draw=False):
|
| 46 |
"""
|
utils/models.py
CHANGED
|
@@ -13,8 +13,8 @@ from .prompts import format_rag_prompt
|
|
| 13 |
|
| 14 |
models = {
|
| 15 |
"Qwen2.5-1.5b-Instruct": "qwen/qwen2.5-1.5b-instruct",
|
| 16 |
-
"Qwen2.5-3b-Instruct": "qwen/qwen2.5-3b-instruct", # remove gated for now
|
| 17 |
-
"Llama-3.2-3b-Instruct": "meta-llama/llama-3.2-3b-instruct",
|
| 18 |
"Llama-3.2-1b-Instruct": "meta-llama/llama-3.2-1b-instruct",
|
| 19 |
"Gemma-3-1b-it" : "google/gemma-3-1b-it",
|
| 20 |
#"Bitnet-b1.58-2B-4T": "microsoft/bitnet-b1.58-2B-4T",
|
|
|
|
| 13 |
|
| 14 |
models = {
|
| 15 |
"Qwen2.5-1.5b-Instruct": "qwen/qwen2.5-1.5b-instruct",
|
| 16 |
+
#"Qwen2.5-3b-Instruct": "qwen/qwen2.5-3b-instruct", # remove gated for now
|
| 17 |
+
#"Llama-3.2-3b-Instruct": "meta-llama/llama-3.2-3b-instruct",
|
| 18 |
"Llama-3.2-1b-Instruct": "meta-llama/llama-3.2-1b-instruct",
|
| 19 |
"Gemma-3-1b-it" : "google/gemma-3-1b-it",
|
| 20 |
#"Bitnet-b1.58-2B-4T": "microsoft/bitnet-b1.58-2B-4T",
|