Spaces:
Running
Running
make qwen-4b default
Browse files
app.py
CHANGED
|
@@ -59,7 +59,10 @@ MODELS = {
|
|
| 59 |
# "repo_id": "unsloth/gpt-oss-20b-BF16",
|
| 60 |
# "description": "A 20B-parameter open-source GPT-style language model quantized to INT4 using AutoRound, with FP8 key-value cache for efficient inference. Optimized for performance and memory efficiency on Intel hardware while maintaining strong language generation capabilities."
|
| 61 |
# },
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
| 63 |
"Apriel-1.5-15b-Thinker": {
|
| 64 |
"repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
|
| 65 |
"description": "Multimodal reasoning model with 15B parameters, trained via extensive mid-training on text and image data, and fine-tuned only on text (no image SFT). Achieves competitive performance on reasoning benchmarks like Artificial Analysis (score: 52), Tau2 Bench Telecom (68), and IFBench (62). Supports both text and image understanding, fits on a single GPU, and includes structured reasoning output with tool and function calling capabilities."
|
|
@@ -99,10 +102,7 @@ MODELS = {
|
|
| 99 |
"repo_id": "Qwen/Qwen3-4B",
|
| 100 |
"description": "Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."
|
| 101 |
},
|
| 102 |
-
|
| 103 |
-
"repo_id": "Qwen/Qwen3-4B-Instruct-2507",
|
| 104 |
-
"description": "Updated non-thinking instruct variant of Qwen3-4B with 4.0B parameters, featuring significant improvements in instruction following, logical reasoning, multilingualism, and 256K long-context understanding. Strong performance across knowledge, coding, alignment, and agent benchmarks."
|
| 105 |
-
},
|
| 106 |
"Gemma-3-4B-IT": {
|
| 107 |
"repo_id": "unsloth/gemma-3-4b-it",
|
| 108 |
"description": "Gemma-3-4B-IT"
|
|
|
|
| 59 |
# "repo_id": "unsloth/gpt-oss-20b-BF16",
|
| 60 |
# "description": "A 20B-parameter open-source GPT-style language model quantized to INT4 using AutoRound, with FP8 key-value cache for efficient inference. Optimized for performance and memory efficiency on Intel hardware while maintaining strong language generation capabilities."
|
| 61 |
# },
|
| 62 |
+
"Qwen3-4B-Instruct-2507": {
|
| 63 |
+
"repo_id": "Qwen/Qwen3-4B-Instruct-2507",
|
| 64 |
+
"description": "Updated non-thinking instruct variant of Qwen3-4B with 4.0B parameters, featuring significant improvements in instruction following, logical reasoning, multilingualism, and 256K long-context understanding. Strong performance across knowledge, coding, alignment, and agent benchmarks."
|
| 65 |
+
},
|
| 66 |
"Apriel-1.5-15b-Thinker": {
|
| 67 |
"repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
|
| 68 |
"description": "Multimodal reasoning model with 15B parameters, trained via extensive mid-training on text and image data, and fine-tuned only on text (no image SFT). Achieves competitive performance on reasoning benchmarks like Artificial Analysis (score: 52), Tau2 Bench Telecom (68), and IFBench (62). Supports both text and image understanding, fits on a single GPU, and includes structured reasoning output with tool and function calling capabilities."
|
|
|
|
| 102 |
"repo_id": "Qwen/Qwen3-4B",
|
| 103 |
"description": "Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."
|
| 104 |
},
|
| 105 |
+
|
|
|
|
|
|
|
|
|
|
| 106 |
"Gemma-3-4B-IT": {
|
| 107 |
"repo_id": "unsloth/gemma-3-4b-it",
|
| 108 |
"description": "Gemma-3-4B-IT"
|