Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ from transformers import AutoTokenizer
|
|
| 2 |
from exllamav2 import (
|
| 3 |
ExLlamaV2,
|
| 4 |
ExLlamaV2Config,
|
| 5 |
-
|
| 6 |
ExLlamaV2Tokenizer
|
| 7 |
)
|
| 8 |
from exllamav2.generator import (
|
|
@@ -10,6 +10,11 @@ from exllamav2.generator import (
|
|
| 10 |
ExLlamaV2Sampler
|
| 11 |
)
|
| 12 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Configure model
|
| 15 |
model_dir = "TheBloke_Wizard-Vicuna-13B-GPTQ" # Path to downloaded model
|
|
@@ -21,7 +26,7 @@ config.prepare()
|
|
| 21 |
|
| 22 |
# Load model
|
| 23 |
model = ExLlamaV2(config)
|
| 24 |
-
cache =
|
| 25 |
model.load_autosplit(cache)
|
| 26 |
|
| 27 |
# Load tokenizer (HF-compatible)
|
|
|
|
| 2 |
from exllamav2 import (
|
| 3 |
ExLlamaV2,
|
| 4 |
ExLlamaV2Config,
|
| 5 |
+
ExLlamaV2Cache_CPU,
|
| 6 |
ExLlamaV2Tokenizer
|
| 7 |
)
|
| 8 |
from exllamav2.generator import (
|
|
|
|
| 10 |
ExLlamaV2Sampler
|
| 11 |
)
|
| 12 |
import torch
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
# disable CUDA
|
| 16 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Disables GPU detection
|
| 17 |
+
os.environ["EXLLAMA_NO_CUDA"] = "1" # Forces CPU mode in ExLlamaV2
|
| 18 |
|
| 19 |
# Configure model
|
| 20 |
model_dir = "TheBloke_Wizard-Vicuna-13B-GPTQ" # Path to downloaded model
|
|
|
|
| 26 |
|
| 27 |
# Load model
|
| 28 |
model = ExLlamaV2(config)
|
| 29 |
+
cache = ExLlamaV2Cache_CPU(model)
|
| 30 |
model.load_autosplit(cache)
|
| 31 |
|
| 32 |
# Load tokenizer (HF-compatible)
|