desert
commited on
Commit
·
932195b
1
Parent(s):
ded9e09
init inference
Browse files
app.py
CHANGED
|
@@ -3,15 +3,27 @@ from llama_cpp import Llama
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
|
| 5 |
# Model identifier from Hugging Face
|
| 6 |
-
|
| 7 |
|
| 8 |
# Download the GGUF file from Hugging Face
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Chat function
|
| 17 |
def chat_with_model(user_input, chat_history):
|
|
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
|
| 5 |
# Model identifier from Hugging Face
|
| 6 |
+
adapter_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID
|
| 7 |
|
| 8 |
# Download the GGUF file from Hugging Face
|
| 9 |
+
lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")
|
| 10 |
|
| 11 |
+
from huggingface_hub import hf_hub_download
|
| 12 |
+
|
| 13 |
+
# Download the base model GGUF file
|
| 14 |
+
base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
|
| 15 |
+
base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Load the base model
|
| 19 |
+
print("Loading base model...")
|
| 20 |
+
llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8)
|
| 21 |
+
|
| 22 |
+
# Apply the LoRA adapter
|
| 23 |
+
print("Applying LoRA adapter...")
|
| 24 |
+
llm.load_adapter(adapter_path=lora_adapter_path)
|
| 25 |
+
|
| 26 |
+
print("Model ready with LoRA adapter!")
|
| 27 |
|
| 28 |
# Chat function
|
| 29 |
def chat_with_model(user_input, chat_history):
|