meta / app.py
ajaykumarr's picture
Create app.py
72e4d27 verified
raw
history blame
725 Bytes
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI, Request
from pydantic import BaseModel
import uvicorn
app = FastAPI()
model_name = "meta-llama/Meta-Llama-3.1-405B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
class Query(BaseModel):
inputs: str
@app.post("/generate")
async def generate(query: Query):
inputs = tokenizer(query.inputs, return_tensors="pt")
outputs = model.generate(inputs["input_ids"])
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"generated_text": response}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)