Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,14 +27,14 @@ cancel_event = threading.Event()
|
|
| 27 |
# ------------------------------
|
| 28 |
MODELS = {
|
| 29 |
# ~30.5B total parameters (MoE: 3.3B activated)
|
| 30 |
-
"Qwen3-30B-A3B-Thinking-2507-FP8": {
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
},
|
| 34 |
-
"Qwen3-30B-A3B-Instruct-2507-FP8": {
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
},
|
| 38 |
|
| 39 |
# ~235B total parameters (MoE: 22B activated) — included for reference if added later
|
| 40 |
# "Qwen3-235B-A22B-Thinking": { ... },
|
|
@@ -95,10 +95,10 @@ MODELS = {
|
|
| 95 |
},
|
| 96 |
|
| 97 |
# ~3B
|
| 98 |
-
"AI21-Jamba-Reasoning-3B": {
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
},
|
| 102 |
"Qwen2.5-Taiwan-3B-Reason-GRPO": {
|
| 103 |
"repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
|
| 104 |
"description": "Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
|
|
|
|
| 27 |
# ------------------------------
|
| 28 |
MODELS = {
|
| 29 |
# ~30.5B total parameters (MoE: 3.3B activated)
|
| 30 |
+
# "Qwen3-30B-A3B-Thinking-2507-FP8": {
|
| 31 |
+
# "repo_id": "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
|
| 32 |
+
# "description": "FP8-quantized MoE model with 30.5B total parameters (3.3B activated), 128 experts (8 activated), 48 layers, and native 262,144-token context. Optimized for complex reasoning tasks with enhanced thinking capabilities in mathematics, coding, science, and agent benchmarks. Supports only thinking mode; includes automatic reasoning delimiters."
|
| 33 |
+
# },
|
| 34 |
+
# "Qwen3-30B-A3B-Instruct-2507-FP8": {
|
| 35 |
+
# "repo_id": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
|
| 36 |
+
# "description": "FP8-quantized instruct-tuned variant of Qwen3-30B-A3B (30.5B total params, 3.3B activated), featuring strong general capabilities in instruction following, tool usage, text generation, and 256K long-context understanding. Ideal for agentic and multi-turn dialogue applications."
|
| 37 |
+
# },
|
| 38 |
|
| 39 |
# ~235B total parameters (MoE: 22B activated) — included for reference if added later
|
| 40 |
# "Qwen3-235B-A22B-Thinking": { ... },
|
|
|
|
| 95 |
},
|
| 96 |
|
| 97 |
# ~3B
|
| 98 |
+
# "AI21-Jamba-Reasoning-3B": {
|
| 99 |
+
# "repo_id": "ai21labs/AI21-Jamba-Reasoning-3B",
|
| 100 |
+
# "description": "A compact 3B hybrid Transformer–Mamba reasoning model with 256K context length, strong intelligence benchmark scores (61% MMLU-Pro, 52% IFBench), and efficient inference suitable for edge and datacenter use. Outperforms Gemma-3 4B and Llama-3.2 3B despite smaller size."
|
| 101 |
+
# },
|
| 102 |
"Qwen2.5-Taiwan-3B-Reason-GRPO": {
|
| 103 |
"repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
|
| 104 |
"description": "Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
|