Spaces:
Runtime error
Runtime error
unsloth llm3-1B
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ class ModelManager:
|
|
| 29 |
if HAS_UNSLOTH and self.device != "cpu":
|
| 30 |
# GPU via Unsloth + LoRA
|
| 31 |
backbone, tokenizer = FastLanguageModel.from_pretrained(
|
| 32 |
-
"
|
| 33 |
load_in_4bit=True,
|
| 34 |
dtype=torch.float16,
|
| 35 |
device_map="auto",
|
|
@@ -50,7 +50,7 @@ class ModelManager:
|
|
| 50 |
|
| 51 |
# --- Fallback: CPU-only via HF Transformers + PEFT ---
|
| 52 |
print("Falling back to CPU-only Transformers + PEFT")
|
| 53 |
-
base_name = "
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(base_name, use_fast=True)
|
| 55 |
base = AutoModelForCausalLM.from_pretrained(
|
| 56 |
base_name,
|
|
@@ -109,4 +109,4 @@ demo = gr.Interface(
|
|
| 109 |
)
|
| 110 |
|
| 111 |
if __name__ == "__main__":
|
| 112 |
-
demo.launch()
|
|
|
|
| 29 |
if HAS_UNSLOTH and self.device != "cpu":
|
| 30 |
# GPU via Unsloth + LoRA
|
| 31 |
backbone, tokenizer = FastLanguageModel.from_pretrained(
|
| 32 |
+
"unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
|
| 33 |
load_in_4bit=True,
|
| 34 |
dtype=torch.float16,
|
| 35 |
device_map="auto",
|
|
|
|
| 50 |
|
| 51 |
# --- Fallback: CPU-only via HF Transformers + PEFT ---
|
| 52 |
print("Falling back to CPU-only Transformers + PEFT")
|
| 53 |
+
base_name = "unsloth/Llama-3.2-1B-Instruct" # non-4bit to run on CPU
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(base_name, use_fast=True)
|
| 55 |
base = AutoModelForCausalLM.from_pretrained(
|
| 56 |
base_name,
|
|
|
|
| 109 |
)
|
| 110 |
|
| 111 |
if __name__ == "__main__":
|
| 112 |
+
demo.launch(share=True)
|