Spaces:

helloperson123
/

tiny-llama-chatbot

Sleeping

helloperson123 commited on 3 days ago

Commit

defb45d

verified ·

1 Parent(s): b57fe6b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,15 +4,14 @@ import torch
 app = Flask(__name__)
-# Load the Phi-3 model
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-print("🚀 Loading model... this may take a minute.")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
 )
 print("✅ Model loaded successfully!")
@@ -26,8 +25,8 @@ def ask():
     data = request.get_json()
     prompt = data.get("prompt", "")
-    # System prompt to guide Phi-3 to act as a helpful assistant
-    full_prompt = f"<|system|>\nYou are Acla, a smart and friendly AI assistant. Be clear and concise.\n<|user|>\n{prompt}\n<|assistant|>"
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
@@ -39,8 +38,6 @@ def ask():
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Clean up: only return assistant's reply
     if "<|assistant|>" in response:
         response = response.split("<|assistant|>")[-1].strip()

 app = Flask(__name__)
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
+print("🚀 Loading Phi-3-mini model...")
+# Load model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    device_map="auto"  # works fine if accelerate is installed
 )
 print("✅ Model loaded successfully!")
     data = request.get_json()
     prompt = data.get("prompt", "")
+    # build prompt
+    full_prompt = f"<|system|>\nYou are Acla, a smart and helpful assistant.\n<|user|>\n{prompt}\n<|assistant|>"
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "<|assistant|>" in response:
         response = response.split("<|assistant|>")[-1].strip()