File size: 1,862 Bytes
e08d99f
c1a8aec
44288b0
c1a8aec
525a45d
5850e2b
525a45d
c1a8aec
 
 
c68e2d9
c1a8aec
 
 
 
44288b0
c1a8aec
 
 
44288b0
c1a8aec
 
63cb412
c1a8aec
8786131
 
 
e08d99f
44288b0
c1a8aec
 
44288b0
 
 
 
8786131
44288b0
 
8786131
44288b0
c1a8aec
 
44288b0
 
5850e2b
c1a8aec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5850e2b
c68e2d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import requests

app = Flask(__name__)

# Allow CORS for everything (so TurboWarp can connect)
from flask_cors import CORS
CORS(app)

# === Load Phi model ===
print("πŸš€ Loading Phi model... this may take a minute.")
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
print("βœ… Model loaded!")

# === Main API ===
@app.route("/api/ask", methods=["POST"])
def ask():
    data = request.get_json()
    prompt = data.get("prompt", "")

    chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
    inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=300,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = text.split("### Response:")[-1].strip()
    return jsonify({"reply": response})


# === Proxy endpoint ===
@app.route("/proxy", methods=["POST"])
def proxy():
    """Forward TurboWarp requests to /api/ask internally."""
    try:
        data = request.get_json()
        r = requests.post("http://localhost:7860/api/ask", json=data)
        return jsonify(r.json())
    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/")
def home():
    return "🧠 Phi-2 Chatbot + Proxy running! Send POST to /proxy or /api/ask"

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)