Spaces:
Running
Running
adjust thread numbers
Browse files
app.py
CHANGED
|
@@ -13,9 +13,10 @@ hf_hub_download(
|
|
| 13 |
def load_model():
|
| 14 |
return Llama(
|
| 15 |
model_path="models/qwen2.5-1.5b-instruct-q4_k_m.gguf",
|
| 16 |
-
n_ctx=
|
| 17 |
-
n_threads=
|
| 18 |
-
|
|
|
|
| 19 |
n_gpu_layers=0,
|
| 20 |
use_mlock=False,
|
| 21 |
use_mmap=True,
|
|
|
|
| 13 |
def load_model():
|
| 14 |
return Llama(
|
| 15 |
model_path="models/qwen2.5-1.5b-instruct-q4_k_m.gguf",
|
| 16 |
+
n_ctx=1024,
|
| 17 |
+
n_threads=2,
|
| 18 |
+
n_threads_batch=2,
|
| 19 |
+
n_batch=4,
|
| 20 |
n_gpu_layers=0,
|
| 21 |
use_mlock=False,
|
| 22 |
use_mmap=True,
|