Spaces:
Paused
Paused
Peter Larnholt
commited on
Commit
·
a19cb21
1
Parent(s):
b4f2fa1
Add tool calling support with Hermes parser for Qwen 2.5
Browse files
app.py
CHANGED
|
@@ -26,7 +26,8 @@ VLLM_ARGS = [
|
|
| 26 |
"--max-model-len", "8192", # fits A10G 24GB
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
-
#
|
|
|
|
| 30 |
]
|
| 31 |
if "AWQ" in MODEL_ID.upper():
|
| 32 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
|
|
|
| 26 |
"--max-model-len", "8192", # fits A10G 24GB
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
+
"--enable-auto-tool-choice", # Enable tool calling
|
| 30 |
+
"--tool-call-parser", "hermes", # Use Hermes parser format for Qwen
|
| 31 |
]
|
| 32 |
if "AWQ" in MODEL_ID.upper():
|
| 33 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|