kooktaeeee / api_v1.py
taekooktea's picture
Update api_v1.py
6f131fb verified
from fastapi import FastAPI
from pydantic import BaseModel
from utils import generate_response, tokenizer
from error_handler import handle_generation_error
from config import MODEL_NAME, API_V1_PORT
import time
app = FastAPI(
title="Phi-3-mini 极速API(v1)",
description="基于microsoft/Phi-3-mini-4k-instruct-ONNX,CPU环境最优适配",
version="1.0.0"
)
class ChatCompletionRequest(BaseModel):
model: str = MODEL_NAME
messages: list[dict]
temperature: float = 0.1
@app.post("/v1/chat/completions", summary="Chat对话接口(兼容OpenAI)")
async def chat_completion(request: ChatCompletionRequest):
try:
user_msg = next((msg for msg in reversed(request.messages) if msg["role"] == "user"), None)
if not user_msg:
raise ValueError("未检测到用户输入消息")
user_input = user_msg["content"]
response_text = generate_response([user_input])[0]
prompt_tokens = len(tokenizer.encode(user_input, add_special_tokens=False))
completion_tokens = len(tokenizer.encode(response_text, add_special_tokens=False))
return {
"id": f"chatcmpl-{hash(user_input)[:8]}",
"object": "chat.completion",
"created": int(time.time()),
"model": request.model,
"choices": [
{
"message": {"role": "assistant", "content": response_text},
"finish_reason": "stop",
"index": 0
}
],
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
}
except Exception as e:
handle_generation_error(e, user_input)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
app,
host="0.0.0.0",
port=API_V1_PORT,
workers=1,
log_level="warning"
)