root commited on
Commit
702e569
·
1 Parent(s): d1aeed2

Auto-deploy: 2025-10-15 16:07:02

Browse files
Files changed (2) hide show
  1. app.py +17 -4
  2. requirements.txt +2 -0
app.py CHANGED
@@ -2,6 +2,8 @@ from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import HTMLResponse, JSONResponse
3
  from pydantic import BaseModel
4
  import os
 
 
5
 
6
  app = FastAPI(
7
  title="Sheikh LLM API",
@@ -9,6 +11,10 @@ app = FastAPI(
9
  version="1.0.0"
10
  )
11
 
 
 
 
 
12
  class ChatRequest(BaseModel):
13
  message: str
14
  max_tokens: int = 100
@@ -107,11 +113,18 @@ async def api_status():
107
 
108
  @app.post("/api/chat", response_model=ChatResponse)
109
  async def chat_endpoint(request: ChatRequest):
110
- """Simple chat endpoint that echoes the message"""
111
  if not request.message.strip():
112
  raise HTTPException(status_code=400, detail="Message cannot be empty")
113
-
114
- response_text = f"Received your message: '{request.message}'. This is from Sheikh LLM API!"
 
 
 
 
 
 
 
115
 
116
  return ChatResponse(
117
  response=response_text,
@@ -120,4 +133,4 @@ async def chat_endpoint(request: ChatRequest):
120
 
121
  if __name__ == "__main__":
122
  import uvicorn
123
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  from fastapi.responses import HTMLResponse, JSONResponse
3
  from pydantic import BaseModel
4
  import os
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ import torch
7
 
8
  app = FastAPI(
9
  title="Sheikh LLM API",
 
11
  version="1.0.0"
12
  )
13
 
14
+ # Load model and tokenizer
15
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
16
+ model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
17
+
18
  class ChatRequest(BaseModel):
19
  message: str
20
  max_tokens: int = 100
 
113
 
114
  @app.post("/api/chat", response_model=ChatResponse)
115
  async def chat_endpoint(request: ChatRequest):
116
+ """Chat endpoint that uses a Hugging Face model"""
117
  if not request.message.strip():
118
  raise HTTPException(status_code=400, detail="Message cannot be empty")
119
+
120
+ # Encode the new user input, add the eos_token and return a tensor in Pytorch
121
+ new_user_input_ids = tokenizer.encode(request.message + tokenizer.eos_token, return_tensors='pt')
122
+
123
+ # Generate a response
124
+ chat_history_ids = model.generate(new_user_input_ids, max_length=request.max_tokens, pad_token_id=tokenizer.eos_token_id)
125
+
126
+ # Decode the response
127
+ response_text = tokenizer.decode(chat_history_ids[:, new_user_input_ids.shape[-1]:][0], skip_special_tokens=True)
128
 
129
  return ChatResponse(
130
  response=response_text,
 
133
 
134
  if __name__ == "__main__":
135
  import uvicorn
136
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -2,3 +2,5 @@ fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
  pydantic==2.5.0
4
  python-multipart==0.0.6
 
 
 
2
  uvicorn[standard]==0.24.0
3
  pydantic==2.5.0
4
  python-multipart==0.0.6
5
+ transformers
6
+ torch