helloperson123 commited on
Commit
defb45d
Β·
verified Β·
1 Parent(s): b57fe6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -8
app.py CHANGED
@@ -4,15 +4,14 @@ import torch
4
 
5
  app = Flask(__name__)
6
 
7
- # Load the Phi-3 model
8
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
9
- print("πŸš€ Loading model... this may take a minute.")
10
 
 
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_NAME,
14
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
- device_map="auto"
16
  )
17
 
18
  print("βœ… Model loaded successfully!")
@@ -26,8 +25,8 @@ def ask():
26
  data = request.get_json()
27
  prompt = data.get("prompt", "")
28
 
29
- # System prompt to guide Phi-3 to act as a helpful assistant
30
- full_prompt = f"<|system|>\nYou are Acla, a smart and friendly AI assistant. Be clear and concise.\n<|user|>\n{prompt}\n<|assistant|>"
31
 
32
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
33
  outputs = model.generate(
@@ -39,8 +38,6 @@ def ask():
39
  )
40
 
41
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
-
43
- # Clean up: only return assistant's reply
44
  if "<|assistant|>" in response:
45
  response = response.split("<|assistant|>")[-1].strip()
46
 
 
4
 
5
  app = Flask(__name__)
6
 
 
7
  MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
8
+ print("πŸš€ Loading Phi-3-mini model...")
9
 
10
+ # Load model and tokenizer
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_NAME,
14
+ device_map="auto" # works fine if accelerate is installed
 
15
  )
16
 
17
  print("βœ… Model loaded successfully!")
 
25
  data = request.get_json()
26
  prompt = data.get("prompt", "")
27
 
28
+ # build prompt
29
+ full_prompt = f"<|system|>\nYou are Acla, a smart and helpful assistant.\n<|user|>\n{prompt}\n<|assistant|>"
30
 
31
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
32
  outputs = model.generate(
 
38
  )
39
 
40
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
41
  if "<|assistant|>" in response:
42
  response = response.split("<|assistant|>")[-1].strip()
43