Spaces:
Sleeping
Sleeping
File size: 1,411 Bytes
e08d99f c1a8aec 19ec13e 44288b0 525a45d 5850e2b c1a8aec c68e2d9 19ec13e c1a8aec 44288b0 c1a8aec 44288b0 c1a8aec 19ec13e 63cb412 8786131 19ec13e e08d99f 44288b0 c1a8aec 44288b0 19ec13e 8786131 44288b0 8786131 44288b0 c1a8aec 44288b0 c1a8aec 19ec13e c1a8aec 5850e2b c68e2d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
from flask_cors import CORS
import torch
app = Flask(__name__)
CORS(app)
print("π Loading Phi model (microsoft/phi-2)...")
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
print("β
Phi model loaded!")
@app.route("/api/ask", methods=["POST"])
def ask():
data = request.get_json(force=True)
prompt = data.get("prompt", "")
chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=250,
temperature=0.7,
top_p=0.9,
do_sample=True
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = text.split("### Response:")[-1].strip()
return jsonify({"reply": response})
@app.route("/")
def home():
return "π§ Phi-2 chatbot is running! POST JSON to /api/ask with {'prompt': 'your question'}."
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|