Spaces:
Running
Running
File size: 2,031 Bytes
318fc38 964a656 6f131fb 964a656 318fc38 6f131fb 964a656 318fc38 964a656 6f131fb 964a656 318fc38 964a656 318fc38 6f131fb 964a656 6f131fb 964a656 6f131fb 318fc38 964a656 318fc38 6f131fb 964a656 318fc38 6f131fb 964a656 318fc38 6f131fb 318fc38 6f131fb 318fc38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from fastapi import FastAPI
from pydantic import BaseModel
from utils import generate_response, tokenizer
from error_handler import handle_generation_error
from config import MODEL_NAME, API_V1_PORT
import time
app = FastAPI(
title="Phi-3-mini 极速API(v1)",
description="基于microsoft/Phi-3-mini-4k-instruct-ONNX,CPU环境最优适配",
version="1.0.0"
)
class ChatCompletionRequest(BaseModel):
model: str = MODEL_NAME
messages: list[dict]
temperature: float = 0.1
@app.post("/v1/chat/completions", summary="Chat对话接口(兼容OpenAI)")
async def chat_completion(request: ChatCompletionRequest):
try:
user_msg = next((msg for msg in reversed(request.messages) if msg["role"] == "user"), None)
if not user_msg:
raise ValueError("未检测到用户输入消息")
user_input = user_msg["content"]
response_text = generate_response([user_input])[0]
prompt_tokens = len(tokenizer.encode(user_input, add_special_tokens=False))
completion_tokens = len(tokenizer.encode(response_text, add_special_tokens=False))
return {
"id": f"chatcmpl-{hash(user_input)[:8]}",
"object": "chat.completion",
"created": int(time.time()),
"model": request.model,
"choices": [
{
"message": {"role": "assistant", "content": response_text},
"finish_reason": "stop",
"index": 0
}
],
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
}
except Exception as e:
handle_generation_error(e, user_input)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
app,
host="0.0.0.0",
port=API_V1_PORT,
workers=1,
log_level="warning"
) |