aryo100 commited on
Commit
53ee96a
·
1 Parent(s): f8184cb

update app

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -1,14 +1,11 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch, os
5
- import uvicorn
6
 
7
- # --- Inisialisasi FastAPI ---
8
- app = FastAPI(title="Qwen Chat API")
9
 
10
- # --- Load model & tokenizer ---
11
- model_name = "Qwen/Qwen-1_8B-Chat" # ganti sesuai model yang muat di RAM
12
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
@@ -17,19 +14,26 @@ model = AutoModelForCausalLM.from_pretrained(
17
  device_map="auto" if torch.cuda.is_available() else "cpu"
18
  )
19
 
20
- # --- Request & Response schema ---
21
  class ChatRequest(BaseModel):
22
  prompt: str
23
  max_new_tokens: int = 128
24
 
25
  @app.post("/chat")
26
  def chat(req: ChatRequest):
27
- inputs = tokenizer(req.prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
28
  outputs = model.generate(**inputs, max_new_tokens=req.max_new_tokens)
29
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
30
  return {"reply": reply}
31
 
32
- # --- Entrypoint ---
33
  if __name__ == "__main__":
34
- port = int(os.environ.get("PORT", 7860)) # HF Spaces default port
35
  uvicorn.run("app:app", host="0.0.0.0", port=port)
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch, os, uvicorn
 
5
 
6
+ app = FastAPI()
 
7
 
8
+ model_name = "Qwen/Qwen-1_8B-Chat" # ganti sesuai ukuran
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_name,
 
14
  device_map="auto" if torch.cuda.is_available() else "cpu"
15
  )
16
 
 
17
  class ChatRequest(BaseModel):
18
  prompt: str
19
  max_new_tokens: int = 128
20
 
21
  @app.post("/chat")
22
  def chat(req: ChatRequest):
23
+ # Format percakapan sesuai template Qwen
24
+ messages = [
25
+ {"role": "system", "content": "You are a helpful AI assistant."},
26
+ {"role": "user", "content": req.prompt},
27
+ ]
28
+
29
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
+
31
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
32
  outputs = model.generate(**inputs, max_new_tokens=req.max_new_tokens)
33
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
+
35
  return {"reply": reply}
36
 
 
37
  if __name__ == "__main__":
38
+ port = int(os.environ.get("PORT", 7860))
39
  uvicorn.run("app:app", host="0.0.0.0", port=port)