aryo100 commited on
Commit
6f78bf3
·
1 Parent(s): ded5869

first commit

Browse files
Files changed (2) hide show
  1. app.py +28 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ app = FastAPI()
6
+
7
+ # Load Qwen-7B once saat startup
8
+ model_name = "Qwen/Qwen-7B"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
11
+
12
+ @app.get("/")
13
+ def home():
14
+ return {"status": "ok", "message": "Qwen-7B API is running!"}
15
+
16
+ @app.post("/chat")
17
+ async def chat(prompt: str):
18
+ inputs = tokenizer(prompt, return_tensors="pt")
19
+ with torch.no_grad():
20
+ outputs = model.generate(
21
+ **inputs,
22
+ max_new_tokens=200,
23
+ do_sample=True,
24
+ temperature=0.7,
25
+ top_p=0.9
26
+ )
27
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
28
+ return {"response": text}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers>=4.40.0
2
+ torch
3
+ fastapi
4
+ uvicorn