helloperson123 commited on
Commit
c1a8aec
Β·
verified Β·
1 Parent(s): defb45d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -20
app.py CHANGED
@@ -1,34 +1,35 @@
1
  from flask import Flask, request, jsonify
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
4
 
5
  app = Flask(__name__)
6
 
7
- MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
8
- print("πŸš€ Loading Phi-3-mini model...")
 
9
 
10
- # Load model and tokenizer
11
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
- MODEL_NAME,
14
- device_map="auto" # works fine if accelerate is installed
 
15
  )
 
 
16
 
17
- print("βœ… Model loaded successfully!")
18
-
19
- @app.route("/")
20
- def home():
21
- return "<h2>🧠 Phi-3-mini API is running!</h2><p>POST JSON to <code>/api/ask</code> with {'prompt': 'your question'}</p>"
22
-
23
  @app.route("/api/ask", methods=["POST"])
24
  def ask():
25
  data = request.get_json()
26
  prompt = data.get("prompt", "")
27
 
28
- # build prompt
29
- full_prompt = f"<|system|>\nYou are Acla, a smart and helpful assistant.\n<|user|>\n{prompt}\n<|assistant|>"
30
 
31
- inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
32
  outputs = model.generate(
33
  **inputs,
34
  max_new_tokens=300,
@@ -37,12 +38,26 @@ def ask():
37
  do_sample=True
38
  )
39
 
40
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
- if "<|assistant|>" in response:
42
- response = response.split("<|assistant|>")[-1].strip()
43
-
44
  return jsonify({"reply": response})
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  if __name__ == "__main__":
48
  app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ import requests
5
 
6
  app = Flask(__name__)
7
 
8
+ # Allow CORS for everything (so TurboWarp can connect)
9
+ from flask_cors import CORS
10
+ CORS(app)
11
 
12
+ # === Load Phi model ===
13
+ print("πŸš€ Loading Phi model... this may take a minute.")
14
+ model_name = "microsoft/phi-2"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  model = AutoModelForCausalLM.from_pretrained(
17
+ model_name,
18
+ torch_dtype=torch.float16,
19
+ low_cpu_mem_usage=True
20
  )
21
+ model.to("cuda" if torch.cuda.is_available() else "cpu")
22
+ print("βœ… Model loaded!")
23
 
24
+ # === Main API ===
 
 
 
 
 
25
  @app.route("/api/ask", methods=["POST"])
26
  def ask():
27
  data = request.get_json()
28
  prompt = data.get("prompt", "")
29
 
30
+ chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
31
+ inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
32
 
 
33
  outputs = model.generate(
34
  **inputs,
35
  max_new_tokens=300,
 
38
  do_sample=True
39
  )
40
 
41
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
+ response = text.split("### Response:")[-1].strip()
 
 
43
  return jsonify({"reply": response})
44
 
45
 
46
+ # === Proxy endpoint ===
47
+ @app.route("/proxy", methods=["POST"])
48
+ def proxy():
49
+ """Forward TurboWarp requests to /api/ask internally."""
50
+ try:
51
+ data = request.get_json()
52
+ r = requests.post("http://localhost:7860/api/ask", json=data)
53
+ return jsonify(r.json())
54
+ except Exception as e:
55
+ return jsonify({"error": str(e)}), 500
56
+
57
+
58
+ @app.route("/")
59
+ def home():
60
+ return "🧠 Phi-2 Chatbot + Proxy running! Send POST to /proxy or /api/ask"
61
+
62
  if __name__ == "__main__":
63
  app.run(host="0.0.0.0", port=7860)