Spaces:

plarnholt
/

excom-ai-demo

Paused

Peter Larnholt commited on Oct 9

Commit

a19cb21

1 Parent(s): b4f2fa1

Add tool calling support with Hermes parser for Qwen 2.5

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,7 +26,8 @@ VLLM_ARGS = [
     "--max-model-len", "8192",               # fits A10G 24GB
     "--gpu-memory-utilization", "0.90",
     "--trust-remote-code",
-    # Qwen 2.5 has native tool calling - no parser override needed
 ]
 if "AWQ" in MODEL_ID.upper():
     VLLM_ARGS += ["--quantization", "awq_marlin"]  # faster AWQ kernel if available

     "--max-model-len", "8192",               # fits A10G 24GB
     "--gpu-memory-utilization", "0.90",
     "--trust-remote-code",
+    "--enable-auto-tool-choice",             # Enable tool calling
+    "--tool-call-parser", "hermes",          # Use Hermes parser format for Qwen
 ]
 if "AWQ" in MODEL_ID.upper():
     VLLM_ARGS += ["--quantization", "awq_marlin"]  # faster AWQ kernel if available