change to sync
Browse files
main.py
CHANGED
|
@@ -154,63 +154,49 @@ class ChatRequest(BaseModel):
|
|
| 154 |
user_id: str = Field(..., description="Unique identifier for the user")
|
| 155 |
enable_followup: bool = Field(default=False, description="Flag to enable follow-up questions")
|
| 156 |
|
| 157 |
-
|
| 158 |
if x_api_key != CHAT_AUTH_KEY:
|
| 159 |
raise HTTPException(status_code=403, detail="Invalid API key")
|
| 160 |
return x_api_key
|
| 161 |
|
| 162 |
-
|
| 163 |
"""
|
| 164 |
-
Make a streaming request to the LLM service.
|
| 165 |
"""
|
| 166 |
try:
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
if
|
| 179 |
-
raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
|
| 180 |
-
|
| 181 |
-
async for chunk in response.aiter_text():
|
| 182 |
yield chunk
|
| 183 |
-
except
|
| 184 |
logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
|
| 185 |
raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
|
| 186 |
except Exception as e:
|
| 187 |
logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
|
| 188 |
raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
|
| 189 |
|
| 190 |
-
|
| 191 |
@app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
|
| 192 |
async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
|
| 193 |
-
"""
|
| 194 |
-
Chat endpoint that uses embeddings search and LLM for response generation.
|
| 195 |
-
"""
|
| 196 |
try:
|
| 197 |
-
# Load embeddings for the specified index
|
| 198 |
document_list = load_embeddings(request.index_id)
|
| 199 |
-
|
| 200 |
-
# Perform embeddings search
|
| 201 |
-
search_results = embeddings.search(request.query, 5) # Get top 5 relevant results
|
| 202 |
context = "\n".join([document_list[idx[0]] for idx in search_results])
|
| 203 |
|
| 204 |
-
# Create RAG prompt
|
| 205 |
rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
|
| 206 |
system_prompt = "You are a helpful assistant tasked with providing answers using the context provided"
|
| 207 |
|
| 208 |
-
# Generate conversation_id if not provided
|
| 209 |
conversation_id = request.conversation_id or str(uuid.uuid4())
|
| 210 |
|
| 211 |
if request.enable_followup:
|
| 212 |
-
# Prepare the request for the LLM service
|
| 213 |
-
pass
|
| 214 |
llm_request = {
|
| 215 |
"query": rag_prompt,
|
| 216 |
"model_id": 'openai/gpt-4o-mini',
|
|
@@ -218,7 +204,6 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
|
|
| 218 |
"user_id": request.user_id
|
| 219 |
}
|
| 220 |
endpoint_url = "https://pvanand-general-chat.hf.space/v2/followup-agent"
|
| 221 |
-
|
| 222 |
else:
|
| 223 |
llm_request = {
|
| 224 |
"prompt": rag_prompt,
|
|
@@ -230,17 +215,13 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
|
|
| 230 |
endpoint_url = "https://pvanand-audio-chat.hf.space/llm-agent"
|
| 231 |
|
| 232 |
logger.info(f"Starting chat response generation for user: {request.user_id} Full request: {llm_request}")
|
| 233 |
-
|
|
|
|
| 234 |
full_response = ""
|
| 235 |
-
|
| 236 |
full_response += chunk
|
| 237 |
yield chunk
|
| 238 |
-
logger.info(f"Finished chat response generation for user: {request.user_id} Full response{full_response}")
|
| 239 |
-
|
| 240 |
-
# Here you might want to add logic to save the conversation or perform other background tasks
|
| 241 |
-
# For example:
|
| 242 |
-
# background_tasks.add_task(save_conversation, request.user_id, conversation_id, request.query, full_response)
|
| 243 |
-
|
| 244 |
|
| 245 |
return StreamingResponse(response_generator(), media_type="text/event-stream")
|
| 246 |
|
|
@@ -249,6 +230,7 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
|
|
| 249 |
raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
|
| 250 |
|
| 251 |
|
|
|
|
| 252 |
@app.on_event("startup")
|
| 253 |
async def startup_event():
|
| 254 |
check_and_index_csv_files()
|
|
|
|
| 154 |
user_id: str = Field(..., description="Unique identifier for the user")
|
| 155 |
enable_followup: bool = Field(default=False, description="Flag to enable follow-up questions")
|
| 156 |
|
| 157 |
+
def get_api_key(x_api_key: str = Header(...)) -> str:
|
| 158 |
if x_api_key != CHAT_AUTH_KEY:
|
| 159 |
raise HTTPException(status_code=403, detail="Invalid API key")
|
| 160 |
return x_api_key
|
| 161 |
|
| 162 |
+
def stream_llm_request(api_key: str, llm_request: dict, endpoint_url: str):
|
| 163 |
"""
|
| 164 |
+
Make a streaming request to the LLM service using requests.
|
| 165 |
"""
|
| 166 |
try:
|
| 167 |
+
headers = {
|
| 168 |
+
"accept": "text/event-stream",
|
| 169 |
+
"X-API-Key": api_key,
|
| 170 |
+
"Content-Type": "application/json"
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
with requests.post(endpoint_url, headers=headers, json=llm_request, stream=True) as response:
|
| 174 |
+
if response.status_code != 200:
|
| 175 |
+
raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
|
| 176 |
+
|
| 177 |
+
for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
|
| 178 |
+
if chunk:
|
|
|
|
|
|
|
|
|
|
| 179 |
yield chunk
|
| 180 |
+
except requests.RequestException as e:
|
| 181 |
logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
|
| 182 |
raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
|
| 183 |
except Exception as e:
|
| 184 |
logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
|
| 185 |
raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
|
| 186 |
|
|
|
|
| 187 |
@app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
|
| 188 |
async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
|
|
|
|
|
|
|
|
|
|
| 189 |
try:
|
|
|
|
| 190 |
document_list = load_embeddings(request.index_id)
|
| 191 |
+
search_results = embeddings.search(request.query, 5)
|
|
|
|
|
|
|
| 192 |
context = "\n".join([document_list[idx[0]] for idx in search_results])
|
| 193 |
|
|
|
|
| 194 |
rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
|
| 195 |
system_prompt = "You are a helpful assistant tasked with providing answers using the context provided"
|
| 196 |
|
|
|
|
| 197 |
conversation_id = request.conversation_id or str(uuid.uuid4())
|
| 198 |
|
| 199 |
if request.enable_followup:
|
|
|
|
|
|
|
| 200 |
llm_request = {
|
| 201 |
"query": rag_prompt,
|
| 202 |
"model_id": 'openai/gpt-4o-mini',
|
|
|
|
| 204 |
"user_id": request.user_id
|
| 205 |
}
|
| 206 |
endpoint_url = "https://pvanand-general-chat.hf.space/v2/followup-agent"
|
|
|
|
| 207 |
else:
|
| 208 |
llm_request = {
|
| 209 |
"prompt": rag_prompt,
|
|
|
|
| 215 |
endpoint_url = "https://pvanand-audio-chat.hf.space/llm-agent"
|
| 216 |
|
| 217 |
logger.info(f"Starting chat response generation for user: {request.user_id} Full request: {llm_request}")
|
| 218 |
+
|
| 219 |
+
def response_generator():
|
| 220 |
full_response = ""
|
| 221 |
+
for chunk in stream_llm_request(api_key, llm_request, endpoint_url):
|
| 222 |
full_response += chunk
|
| 223 |
yield chunk
|
| 224 |
+
logger.info(f"Finished chat response generation for user: {request.user_id} Full response: {full_response}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
return StreamingResponse(response_generator(), media_type="text/event-stream")
|
| 227 |
|
|
|
|
| 230 |
raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
|
| 231 |
|
| 232 |
|
| 233 |
+
|
| 234 |
@app.on_event("startup")
|
| 235 |
async def startup_event():
|
| 236 |
check_and_index_csv_files()
|