Luigi commited on
Commit
5e03586
·
verified ·
1 Parent(s): c6e816c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -27,14 +27,14 @@ cancel_event = threading.Event()
27
  # ------------------------------
28
  MODELS = {
29
  # ~30.5B total parameters (MoE: 3.3B activated)
30
- "Qwen3-30B-A3B-Thinking-2507-FP8": {
31
- "repo_id": "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
32
- "description": "FP8-quantized MoE model with 30.5B total parameters (3.3B activated), 128 experts (8 activated), 48 layers, and native 262,144-token context. Optimized for complex reasoning tasks with enhanced thinking capabilities in mathematics, coding, science, and agent benchmarks. Supports only thinking mode; includes automatic reasoning delimiters."
33
- },
34
- "Qwen3-30B-A3B-Instruct-2507-FP8": {
35
- "repo_id": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
36
- "description": "FP8-quantized instruct-tuned variant of Qwen3-30B-A3B (30.5B total params, 3.3B activated), featuring strong general capabilities in instruction following, tool usage, text generation, and 256K long-context understanding. Ideal for agentic and multi-turn dialogue applications."
37
- },
38
 
39
  # ~235B total parameters (MoE: 22B activated) — included for reference if added later
40
  # "Qwen3-235B-A22B-Thinking": { ... },
@@ -95,10 +95,10 @@ MODELS = {
95
  },
96
 
97
  # ~3B
98
- "AI21-Jamba-Reasoning-3B": {
99
- "repo_id": "ai21labs/AI21-Jamba-Reasoning-3B",
100
- "description": "A compact 3B hybrid Transformer–Mamba reasoning model with 256K context length, strong intelligence benchmark scores (61% MMLU-Pro, 52% IFBench), and efficient inference suitable for edge and datacenter use. Outperforms Gemma-3 4B and Llama-3.2 3B despite smaller size."
101
- },
102
  "Qwen2.5-Taiwan-3B-Reason-GRPO": {
103
  "repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
104
  "description": "Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"
 
27
  # ------------------------------
28
  MODELS = {
29
  # ~30.5B total parameters (MoE: 3.3B activated)
30
+ # "Qwen3-30B-A3B-Thinking-2507-FP8": {
31
+ # "repo_id": "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
32
+ # "description": "FP8-quantized MoE model with 30.5B total parameters (3.3B activated), 128 experts (8 activated), 48 layers, and native 262,144-token context. Optimized for complex reasoning tasks with enhanced thinking capabilities in mathematics, coding, science, and agent benchmarks. Supports only thinking mode; includes automatic reasoning delimiters."
33
+ # },
34
+ # "Qwen3-30B-A3B-Instruct-2507-FP8": {
35
+ # "repo_id": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
36
+ # "description": "FP8-quantized instruct-tuned variant of Qwen3-30B-A3B (30.5B total params, 3.3B activated), featuring strong general capabilities in instruction following, tool usage, text generation, and 256K long-context understanding. Ideal for agentic and multi-turn dialogue applications."
37
+ # },
38
 
39
  # ~235B total parameters (MoE: 22B activated) — included for reference if added later
40
  # "Qwen3-235B-A22B-Thinking": { ... },
 
95
  },
96
 
97
  # ~3B
98
+ # "AI21-Jamba-Reasoning-3B": {
99
+ # "repo_id": "ai21labs/AI21-Jamba-Reasoning-3B",
100
+ # "description": "A compact 3B hybrid Transformer–Mamba reasoning model with 256K context length, strong intelligence benchmark scores (61% MMLU-Pro, 52% IFBench), and efficient inference suitable for edge and datacenter use. Outperforms Gemma-3 4B and Llama-3.2 3B despite smaller size."
101
+ # },
102
  "Qwen2.5-Taiwan-3B-Reason-GRPO": {
103
  "repo_id": "benchang1110/Qwen2.5-Taiwan-3B-Reason-GRPO",
104
  "description": "Qwen2.5-Taiwan model with 3 B parameters, Reason-GRPO fine-tuned"