Spaces:

helloperson123
/

tiny-llama-chatbot

Runtime error

App Files Files Community

helloperson123 commited on Nov 12

Commit

4386b39

verified ·

1 Parent(s): c20d839

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -30

app.py CHANGED Viewed

@@ -1,43 +1,56 @@
 from flask import Flask, request, jsonify
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 app = Flask(__name__)
-# -------------------------------
-# Load Phi-3-mini model
-# -------------------------------
-MODEL_NAME = "helloperson123/phi-3-mini"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
-# Create text-generation pipeline
-generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=256,
-)
-# -------------------------------
-# System prompt for behavior
-# -------------------------------
-SYSTEM_PROMPT = """#n You are Acla, an AI assistant that is helpful, concise,
-and specializes in reasoning, math, and coding. Answer clearly and step by step."""
 @app.route("/api/ask", methods=["POST"])
 def ask():
-    data = request.json
-    user_prompt = data.get("prompt", "")
-    # Combine system prompt + user prompt
-    full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_prompt}\nAI:"
     try:
-        output = generator(full_prompt)
-        reply = output[0]["generated_text"].split("AI:")[-1].strip()
-        return jsonify({"reply": reply})
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=5000)

+# app.py
 from flask import Flask, request, jsonify
+from flask_cors import CORS
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 app = Flask(__name__)
+CORS(app)
+# ----------------------------
+# Model setup
+# ----------------------------
+MODEL_NAME = "openaccess-ai/phi-3-mini"  # Public HF model
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
+# ----------------------------
+# System prompt
+# ----------------------------
+SYSTEM_PROMPT = """#n
+You are Acla, an AI Aclaassistant. Be helpful, concise, and accurate.
+Focus on math, reasoning, and code when relevant.
+Always respond in a friendly and clear manner.
+"""
+# ----------------------------
+# API endpoint
+# ----------------------------
 @app.route("/api/ask", methods=["POST"])
 def ask():
     try:
+        data = request.json
+        user_prompt = data.get("prompt", "")
+        # Combine system prompt and user input
+        full_prompt = SYSTEM_PROMPT + "\nUser: " + user_prompt + "\nAI:"
+        inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
+        outputs = model.generate(**inputs, max_new_tokens=150)
+        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Remove the system prompt part from output
+        answer = answer.replace(full_prompt, "").strip()
+        return jsonify({"reply": answer})
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+# ----------------------------
+# Run app
+# ----------------------------
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)