File size: 2,031 Bytes
318fc38
964a656
6f131fb
964a656
318fc38
6f131fb
964a656
318fc38
 
 
 
 
964a656
 
6f131fb
 
 
964a656
318fc38
964a656
 
318fc38
 
 
 
 
 
 
6f131fb
 
 
964a656
6f131fb
964a656
6f131fb
318fc38
964a656
 
318fc38
6f131fb
964a656
 
 
318fc38
6f131fb
 
 
 
964a656
 
 
 
 
 
318fc38
 
6f131fb
318fc38
6f131fb
 
318fc38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from fastapi import FastAPI
from pydantic import BaseModel
from utils import generate_response, tokenizer
from error_handler import handle_generation_error
from config import MODEL_NAME, API_V1_PORT
import time

app = FastAPI(
    title="Phi-3-mini 极速API(v1)",
    description="基于microsoft/Phi-3-mini-4k-instruct-ONNX,CPU环境最优适配",
    version="1.0.0"
)

class ChatCompletionRequest(BaseModel):
    model: str = MODEL_NAME
    messages: list[dict]
    temperature: float = 0.1

@app.post("/v1/chat/completions", summary="Chat对话接口(兼容OpenAI)")
async def chat_completion(request: ChatCompletionRequest):
    try:
        user_msg = next((msg for msg in reversed(request.messages) if msg["role"] == "user"), None)
        if not user_msg:
            raise ValueError("未检测到用户输入消息")
        user_input = user_msg["content"]
        
        response_text = generate_response([user_input])[0]
        
        prompt_tokens = len(tokenizer.encode(user_input, add_special_tokens=False))
        completion_tokens = len(tokenizer.encode(response_text, add_special_tokens=False))
        
        return {
            "id": f"chatcmpl-{hash(user_input)[:8]}",
            "object": "chat.completion",
            "created": int(time.time()),
            "model": request.model,
            "choices": [
                {
                    "message": {"role": "assistant", "content": response_text},
                    "finish_reason": "stop",
                    "index": 0
                }
            ],
            "usage": {
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
                "total_tokens": prompt_tokens + completion_tokens
            }
        }
    except Exception as e:
        handle_generation_error(e, user_input)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=API_V1_PORT,
        workers=1,
        log_level="warning"
    )