SLM-RAG-Arena

Running on Zero

oliver-aizip commited on May 9

Commit

c5183c8

1 Parent(s): 491e00d

add a ton of models and update arena dataset

Files changed (2) hide show

utils/arena_df.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

utils/models.py CHANGED Viewed

@@ -18,7 +18,14 @@ models = {
     "Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
     #"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
     "IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
-    "Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T"
 }
@@ -94,6 +101,10 @@ def run_inference(model_name, context, question):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     result = ""
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
@@ -114,10 +125,10 @@ def run_inference(model_name, context, question):
             model=model_name,
             tokenizer=tokenizer,
             device_map='auto',
-            max_length=512,
             do_sample=True,
             temperature=0.6,
             top_p=0.9,
         )
         text_input = format_rag_prompt(question, context, accepts_sys)

     "Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
     #"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
     "IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
+    #"Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T",
+    "MiniCPM3-RAG-LoRA": "openbmb/MiniCPM3-RAG-LoRA",
+    "Qwen3-0.6b": "qwen/qwen3-0.6b",
+    "Qwen3-1.7b": "qwen/qwen3-1.7b",
+    "Qwen3-4b": "qwen/qwen3-4b",
+    "SmolLM2-1.7b-Instruct": "huggingfacetb/smolllm2-1.7b-instruct",
+    "EXAONE-3.5-2.4B-instruct": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct",
+    "OLMo-2-1B-Instruct": "allenai/OLMo-2-0425-1B-Instruct",
 }
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     result = ""
+    model_kwargs = {} # make sure qwen3 doesn't use thinking
+    if "qwen3" in model_name.lower(): # Making it case-insensitive and checking for substring
+        print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
+        model_kwargs["enable_thinking"] = False
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
             model=model_name,
             tokenizer=tokenizer,
             device_map='auto',
             do_sample=True,
             temperature=0.6,
             top_p=0.9,
+            model_kwargs=model_kwargs,
         )
         text_input = format_rag_prompt(question, context, accepts_sys)