Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c5183c8
1
Parent(s):
491e00d
add a ton of models and update arena dataset
Browse files- utils/arena_df.csv +0 -0
- utils/models.py +13 -2
utils/arena_df.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
utils/models.py
CHANGED
|
@@ -18,7 +18,14 @@ models = {
|
|
| 18 |
"Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
|
| 19 |
#"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
|
| 20 |
"IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
|
| 21 |
-
"Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
}
|
| 24 |
|
|
@@ -94,6 +101,10 @@ def run_inference(model_name, context, question):
|
|
| 94 |
|
| 95 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 96 |
result = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
try:
|
| 99 |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
|
|
@@ -114,10 +125,10 @@ def run_inference(model_name, context, question):
|
|
| 114 |
model=model_name,
|
| 115 |
tokenizer=tokenizer,
|
| 116 |
device_map='auto',
|
| 117 |
-
max_length=512,
|
| 118 |
do_sample=True,
|
| 119 |
temperature=0.6,
|
| 120 |
top_p=0.9,
|
|
|
|
| 121 |
)
|
| 122 |
|
| 123 |
text_input = format_rag_prompt(question, context, accepts_sys)
|
|
|
|
| 18 |
"Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
|
| 19 |
#"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
|
| 20 |
"IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
|
| 21 |
+
#"Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T",
|
| 22 |
+
"MiniCPM3-RAG-LoRA": "openbmb/MiniCPM3-RAG-LoRA",
|
| 23 |
+
"Qwen3-0.6b": "qwen/qwen3-0.6b",
|
| 24 |
+
"Qwen3-1.7b": "qwen/qwen3-1.7b",
|
| 25 |
+
"Qwen3-4b": "qwen/qwen3-4b",
|
| 26 |
+
"SmolLM2-1.7b-Instruct": "huggingfacetb/smolllm2-1.7b-instruct",
|
| 27 |
+
"EXAONE-3.5-2.4B-instruct": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct",
|
| 28 |
+
"OLMo-2-1B-Instruct": "allenai/OLMo-2-0425-1B-Instruct",
|
| 29 |
|
| 30 |
}
|
| 31 |
|
|
|
|
| 101 |
|
| 102 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 103 |
result = ""
|
| 104 |
+
model_kwargs = {} # make sure qwen3 doesn't use thinking
|
| 105 |
+
if "qwen3" in model_name.lower(): # Making it case-insensitive and checking for substring
|
| 106 |
+
print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
|
| 107 |
+
model_kwargs["enable_thinking"] = False
|
| 108 |
|
| 109 |
try:
|
| 110 |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
|
|
|
|
| 125 |
model=model_name,
|
| 126 |
tokenizer=tokenizer,
|
| 127 |
device_map='auto',
|
|
|
|
| 128 |
do_sample=True,
|
| 129 |
temperature=0.6,
|
| 130 |
top_p=0.9,
|
| 131 |
+
model_kwargs=model_kwargs,
|
| 132 |
)
|
| 133 |
|
| 134 |
text_input = format_rag_prompt(question, context, accepts_sys)
|