Spaces:

helloperson123
/

tiny-llama-chatbot

Sleeping

File size: 1,411 Bytes

e08d99f
c1a8aec
19ec13e
44288b0
525a45d
5850e2b
c1a8aec
c68e2d9
19ec13e
c1a8aec
 
44288b0
c1a8aec
 
 
44288b0
c1a8aec
19ec13e
63cb412
8786131
 
19ec13e
e08d99f
44288b0
c1a8aec
 
44288b0
 
19ec13e
8786131
44288b0
 
8786131
44288b0
c1a8aec
 
44288b0
 
c1a8aec
 
19ec13e
c1a8aec
5850e2b
c68e2d9

from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
from flask_cors import CORS
import torch

app = Flask(__name__)
CORS(app)

print("🚀 Loading Phi model (microsoft/phi-2)...")
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
print("✅ Phi model loaded!")

@app.route("/api/ask", methods=["POST"])
def ask():
    data = request.get_json(force=True)
    prompt = data.get("prompt", "")

    chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
    inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=250,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = text.split("### Response:")[-1].strip()
    return jsonify({"reply": response})

@app.route("/")
def home():
    return "🧠 Phi-2 chatbot is running! POST JSON to /api/ask with {'prompt': 'your question'}."

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)