Spaces:
Running
Running
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from utils import generate_response, tokenizer | |
| from error_handler import handle_generation_error | |
| from config import MODEL_NAME, API_V1_PORT | |
| import time | |
| app = FastAPI( | |
| title="Phi-3-mini 极速API(v1)", | |
| description="基于microsoft/Phi-3-mini-4k-instruct-ONNX,CPU环境最优适配", | |
| version="1.0.0" | |
| ) | |
| class ChatCompletionRequest(BaseModel): | |
| model: str = MODEL_NAME | |
| messages: list[dict] | |
| temperature: float = 0.1 | |
| async def chat_completion(request: ChatCompletionRequest): | |
| try: | |
| user_msg = next((msg for msg in reversed(request.messages) if msg["role"] == "user"), None) | |
| if not user_msg: | |
| raise ValueError("未检测到用户输入消息") | |
| user_input = user_msg["content"] | |
| response_text = generate_response([user_input])[0] | |
| prompt_tokens = len(tokenizer.encode(user_input, add_special_tokens=False)) | |
| completion_tokens = len(tokenizer.encode(response_text, add_special_tokens=False)) | |
| return { | |
| "id": f"chatcmpl-{hash(user_input)[:8]}", | |
| "object": "chat.completion", | |
| "created": int(time.time()), | |
| "model": request.model, | |
| "choices": [ | |
| { | |
| "message": {"role": "assistant", "content": response_text}, | |
| "finish_reason": "stop", | |
| "index": 0 | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": prompt_tokens, | |
| "completion_tokens": completion_tokens, | |
| "total_tokens": prompt_tokens + completion_tokens | |
| } | |
| } | |
| except Exception as e: | |
| handle_generation_error(e, user_input) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run( | |
| app, | |
| host="0.0.0.0", | |
| port=API_V1_PORT, | |
| workers=1, | |
| log_level="warning" | |
| ) |