helloperson123 commited on
Commit
4386b39
·
verified ·
1 Parent(s): c20d839

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -30
app.py CHANGED
@@ -1,43 +1,56 @@
 
1
  from flask import Flask, request, jsonify
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
 
3
 
4
  app = Flask(__name__)
 
5
 
6
- # -------------------------------
7
- # Load Phi-3-mini model
8
- # -------------------------------
9
- MODEL_NAME = "helloperson123/phi-3-mini"
 
 
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
12
-
13
- # Create text-generation pipeline
14
- generator = pipeline(
15
- "text-generation",
16
- model=model,
17
- tokenizer=tokenizer,
18
- max_new_tokens=256,
19
- )
20
-
21
- # -------------------------------
22
- # System prompt for behavior
23
- # -------------------------------
24
- SYSTEM_PROMPT = """#n You are Acla, an AI assistant that is helpful, concise,
25
- and specializes in reasoning, math, and coding. Answer clearly and step by step."""
26
 
 
 
 
27
  @app.route("/api/ask", methods=["POST"])
28
  def ask():
29
- data = request.json
30
- user_prompt = data.get("prompt", "")
31
-
32
- # Combine system prompt + user prompt
33
- full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_prompt}\nAI:"
34
-
35
  try:
36
- output = generator(full_prompt)
37
- reply = output[0]["generated_text"].split("AI:")[-1].strip()
38
- return jsonify({"reply": reply})
 
 
 
 
 
 
 
 
 
 
 
39
  except Exception as e:
40
  return jsonify({"error": str(e)}), 500
41
 
 
 
 
42
  if __name__ == "__main__":
43
- app.run(host="0.0.0.0", port=5000)
 
1
+ # app.py
2
  from flask import Flask, request, jsonify
3
+ from flask_cors import CORS
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
 
7
  app = Flask(__name__)
8
+ CORS(app)
9
 
10
+ # ----------------------------
11
+ # Model setup
12
+ # ----------------------------
13
+ MODEL_NAME = "openaccess-ai/phi-3-mini" # Public HF model
14
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ # Load tokenizer and model
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
19
+
20
+ # ----------------------------
21
+ # System prompt
22
+ # ----------------------------
23
+ SYSTEM_PROMPT = """#n
24
+ You are Acla, an AI Aclaassistant. Be helpful, concise, and accurate.
25
+ Focus on math, reasoning, and code when relevant.
26
+ Always respond in a friendly and clear manner.
27
+ """
 
 
 
 
 
28
 
29
+ # ----------------------------
30
+ # API endpoint
31
+ # ----------------------------
32
  @app.route("/api/ask", methods=["POST"])
33
  def ask():
 
 
 
 
 
 
34
  try:
35
+ data = request.json
36
+ user_prompt = data.get("prompt", "")
37
+
38
+ # Combine system prompt and user input
39
+ full_prompt = SYSTEM_PROMPT + "\nUser: " + user_prompt + "\nAI:"
40
+
41
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
42
+ outputs = model.generate(**inputs, max_new_tokens=150)
43
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
+
45
+ # Remove the system prompt part from output
46
+ answer = answer.replace(full_prompt, "").strip()
47
+
48
+ return jsonify({"reply": answer})
49
  except Exception as e:
50
  return jsonify({"error": str(e)}), 500
51
 
52
+ # ----------------------------
53
+ # Run app
54
+ # ----------------------------
55
  if __name__ == "__main__":
56
+ app.run(host="0.0.0.0", port=7860)