File size: 638 Bytes
f394a62
 
 
6247e60
f394a62
 
 
e53d910
1cadecd
e53d910
1cadecd
e53d910
 
39cc8a5
 
 
 
 
 
a5b1f33
c6cb00e
65f1222
e53d910
823c760
73fcf85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from gptqmodel import GPTQModel
# load Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4 from modelscope
model = GPTQModel.load("TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ")

async def generate_response(input: str):
    result = model.generate(input)[0] 
    return model.tokenizer.decode(result)
    

##############################################

from fastapi import FastAPI
import uvicorn

app = FastAPI()

@app.get("/")
def greet_json():
    return {"Hello": "World!"}

@app.get("/message")
async def message(input: str):
    return generate_response(input)

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)