Spaces:
Sleeping
Sleeping
root
commited on
Commit
·
702e569
1
Parent(s):
d1aeed2
Auto-deploy: 2025-10-15 16:07:02
Browse files- app.py +17 -4
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -2,6 +2,8 @@ from fastapi import FastAPI, HTTPException
|
|
| 2 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 3 |
from pydantic import BaseModel
|
| 4 |
import os
|
|
|
|
|
|
|
| 5 |
|
| 6 |
app = FastAPI(
|
| 7 |
title="Sheikh LLM API",
|
|
@@ -9,6 +11,10 @@ app = FastAPI(
|
|
| 9 |
version="1.0.0"
|
| 10 |
)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
class ChatRequest(BaseModel):
|
| 13 |
message: str
|
| 14 |
max_tokens: int = 100
|
|
@@ -107,11 +113,18 @@ async def api_status():
|
|
| 107 |
|
| 108 |
@app.post("/api/chat", response_model=ChatResponse)
|
| 109 |
async def chat_endpoint(request: ChatRequest):
|
| 110 |
-
"""
|
| 111 |
if not request.message.strip():
|
| 112 |
raise HTTPException(status_code=400, detail="Message cannot be empty")
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
return ChatResponse(
|
| 117 |
response=response_text,
|
|
@@ -120,4 +133,4 @@ async def chat_endpoint(request: ChatRequest):
|
|
| 120 |
|
| 121 |
if __name__ == "__main__":
|
| 122 |
import uvicorn
|
| 123 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 2 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 3 |
from pydantic import BaseModel
|
| 4 |
import os
|
| 5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 6 |
+
import torch
|
| 7 |
|
| 8 |
app = FastAPI(
|
| 9 |
title="Sheikh LLM API",
|
|
|
|
| 11 |
version="1.0.0"
|
| 12 |
)
|
| 13 |
|
| 14 |
+
# Load model and tokenizer
|
| 15 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
|
| 16 |
+
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
|
| 17 |
+
|
| 18 |
class ChatRequest(BaseModel):
|
| 19 |
message: str
|
| 20 |
max_tokens: int = 100
|
|
|
|
| 113 |
|
| 114 |
@app.post("/api/chat", response_model=ChatResponse)
|
| 115 |
async def chat_endpoint(request: ChatRequest):
|
| 116 |
+
"""Chat endpoint that uses a Hugging Face model"""
|
| 117 |
if not request.message.strip():
|
| 118 |
raise HTTPException(status_code=400, detail="Message cannot be empty")
|
| 119 |
+
|
| 120 |
+
# Encode the new user input, add the eos_token and return a tensor in Pytorch
|
| 121 |
+
new_user_input_ids = tokenizer.encode(request.message + tokenizer.eos_token, return_tensors='pt')
|
| 122 |
+
|
| 123 |
+
# Generate a response
|
| 124 |
+
chat_history_ids = model.generate(new_user_input_ids, max_length=request.max_tokens, pad_token_id=tokenizer.eos_token_id)
|
| 125 |
+
|
| 126 |
+
# Decode the response
|
| 127 |
+
response_text = tokenizer.decode(chat_history_ids[:, new_user_input_ids.shape[-1]:][0], skip_special_tokens=True)
|
| 128 |
|
| 129 |
return ChatResponse(
|
| 130 |
response=response_text,
|
|
|
|
| 133 |
|
| 134 |
if __name__ == "__main__":
|
| 135 |
import uvicorn
|
| 136 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
CHANGED
|
@@ -2,3 +2,5 @@ fastapi==0.104.1
|
|
| 2 |
uvicorn[standard]==0.24.0
|
| 3 |
pydantic==2.5.0
|
| 4 |
python-multipart==0.0.6
|
|
|
|
|
|
|
|
|
| 2 |
uvicorn[standard]==0.24.0
|
| 3 |
pydantic==2.5.0
|
| 4 |
python-multipart==0.0.6
|
| 5 |
+
transformers
|
| 6 |
+
torch
|