helloperson123 commited on
Commit
19ec13e
Β·
verified Β·
1 Parent(s): c1a8aec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -25
app.py CHANGED
@@ -1,16 +1,12 @@
1
  from flask import Flask, request, jsonify
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
  import torch
4
- import requests
5
 
6
  app = Flask(__name__)
7
-
8
- # Allow CORS for everything (so TurboWarp can connect)
9
- from flask_cors import CORS
10
  CORS(app)
11
 
12
- # === Load Phi model ===
13
- print("πŸš€ Loading Phi model... this may take a minute.")
14
  model_name = "microsoft/phi-2"
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  model = AutoModelForCausalLM.from_pretrained(
@@ -19,20 +15,18 @@ model = AutoModelForCausalLM.from_pretrained(
19
  low_cpu_mem_usage=True
20
  )
21
  model.to("cuda" if torch.cuda.is_available() else "cpu")
22
- print("βœ… Model loaded!")
23
 
24
- # === Main API ===
25
  @app.route("/api/ask", methods=["POST"])
26
  def ask():
27
- data = request.get_json()
28
  prompt = data.get("prompt", "")
29
 
30
  chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
31
  inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
32
-
33
  outputs = model.generate(
34
  **inputs,
35
- max_new_tokens=300,
36
  temperature=0.7,
37
  top_p=0.9,
38
  do_sample=True
@@ -42,22 +36,9 @@ def ask():
42
  response = text.split("### Response:")[-1].strip()
43
  return jsonify({"reply": response})
44
 
45
-
46
- # === Proxy endpoint ===
47
- @app.route("/proxy", methods=["POST"])
48
- def proxy():
49
- """Forward TurboWarp requests to /api/ask internally."""
50
- try:
51
- data = request.get_json()
52
- r = requests.post("http://localhost:7860/api/ask", json=data)
53
- return jsonify(r.json())
54
- except Exception as e:
55
- return jsonify({"error": str(e)}), 500
56
-
57
-
58
  @app.route("/")
59
  def home():
60
- return "🧠 Phi-2 Chatbot + Proxy running! Send POST to /proxy or /api/ask"
61
 
62
  if __name__ == "__main__":
63
  app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, request, jsonify
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from flask_cors import CORS
4
  import torch
 
5
 
6
  app = Flask(__name__)
 
 
 
7
  CORS(app)
8
 
9
+ print("πŸš€ Loading Phi model (microsoft/phi-2)...")
 
10
  model_name = "microsoft/phi-2"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModelForCausalLM.from_pretrained(
 
15
  low_cpu_mem_usage=True
16
  )
17
  model.to("cuda" if torch.cuda.is_available() else "cpu")
18
+ print("βœ… Phi model loaded!")
19
 
 
20
  @app.route("/api/ask", methods=["POST"])
21
  def ask():
22
+ data = request.get_json(force=True)
23
  prompt = data.get("prompt", "")
24
 
25
  chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
26
  inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
 
27
  outputs = model.generate(
28
  **inputs,
29
+ max_new_tokens=250,
30
  temperature=0.7,
31
  top_p=0.9,
32
  do_sample=True
 
36
  response = text.split("### Response:")[-1].strip()
37
  return jsonify({"reply": response})
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  @app.route("/")
40
  def home():
41
+ return "🧠 Phi-2 chatbot is running! POST JSON to /api/ask with {'prompt': 'your question'}."
42
 
43
  if __name__ == "__main__":
44
  app.run(host="0.0.0.0", port=7860)