Spaces:

pvanand
/

rag-chat

Sleeping

App Files Files Community

pvanand commited on Aug 17, 2024

Commit

2142606

verified ·

1 Parent(s): 34f4e14

change to sync

Browse files

Files changed (1) hide show

main.py +22 -40

main.py CHANGED Viewed

@@ -154,63 +154,49 @@ class ChatRequest(BaseModel):
     user_id: str = Field(..., description="Unique identifier for the user")
     enable_followup: bool = Field(default=False, description="Flag to enable follow-up questions")
-async def get_api_key(x_api_key: str = Header(...)) -> str:
     if x_api_key != CHAT_AUTH_KEY:
         raise HTTPException(status_code=403, detail="Invalid API key")
     return x_api_key
-async def stream_llm_request(api_key: str, llm_request: Dict[str, str], endpoint_url: str) -> AsyncGenerator[str, None]:
     """
-    Make a streaming request to the LLM service.
     """
     try:
-        async with httpx.AsyncClient() as client:
-            async with client.stream(
-                "POST",
-                endpoint_url,
-                headers={
-                    "accept": "text/event-stream",
-                    "X-API-Key": api_key,
-                    "Content-Type": "application/json"
-                },
-                json=llm_request
-            ) as response:
-                if response.status_code != 200:
-                    raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
-                async for chunk in response.aiter_text():
                     yield chunk
-    except httpx.HTTPError as e:
         logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
     except Exception as e:
         logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
 @app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
 async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
-    """
-    Chat endpoint that uses embeddings search and LLM for response generation.
-    """
     try:
-        # Load embeddings for the specified index
         document_list = load_embeddings(request.index_id)
-        # Perform embeddings search
-        search_results = embeddings.search(request.query, 5)  # Get top 5 relevant results
         context = "\n".join([document_list[idx[0]] for idx in search_results])
-        # Create RAG prompt
         rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
         system_prompt = "You are a helpful assistant tasked with providing answers using the context provided"
-        # Generate conversation_id if not provided
         conversation_id = request.conversation_id or str(uuid.uuid4())
         if request.enable_followup:
-            # Prepare the request for the LLM service
-            pass
             llm_request = {
                 "query": rag_prompt,
                 "model_id": 'openai/gpt-4o-mini',
@@ -218,7 +204,6 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
                 "user_id": request.user_id
             }
             endpoint_url = "https://pvanand-general-chat.hf.space/v2/followup-agent"
         else:
             llm_request = {
                 "prompt": rag_prompt,
@@ -230,17 +215,13 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
             endpoint_url = "https://pvanand-audio-chat.hf.space/llm-agent"
         logger.info(f"Starting chat response generation for user: {request.user_id} Full request: {llm_request}")
-        async def response_generator():
             full_response = ""
-            async for chunk in stream_llm_request(api_key, llm_request,endpoint_url):
                 full_response += chunk
                 yield chunk
-            logger.info(f"Finished chat response generation for user: {request.user_id} Full response{full_response}")
-            # Here you might want to add logic to save the conversation or perform other background tasks
-            # For example:
-            # background_tasks.add_task(save_conversation, request.user_id, conversation_id, request.query, full_response)
         return StreamingResponse(response_generator(), media_type="text/event-stream")
@@ -249,6 +230,7 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
         raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
 @app.on_event("startup")
 async def startup_event():
     check_and_index_csv_files()

     user_id: str = Field(..., description="Unique identifier for the user")
     enable_followup: bool = Field(default=False, description="Flag to enable follow-up questions")
+def get_api_key(x_api_key: str = Header(...)) -> str:
     if x_api_key != CHAT_AUTH_KEY:
         raise HTTPException(status_code=403, detail="Invalid API key")
     return x_api_key
+def stream_llm_request(api_key: str, llm_request: dict, endpoint_url: str):
     """
+    Make a streaming request to the LLM service using requests.
     """
     try:
+        headers = {
+            "accept": "text/event-stream",
+            "X-API-Key": api_key,
+            "Content-Type": "application/json"
+        }
+        with requests.post(endpoint_url, headers=headers, json=llm_request, stream=True) as response:
+            if response.status_code != 200:
+                raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
+            for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
+                if chunk:
                     yield chunk
+    except requests.RequestException as e:
         logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
     except Exception as e:
         logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
 @app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
 async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
     try:
         document_list = load_embeddings(request.index_id)
+        search_results = embeddings.search(request.query, 5)
         context = "\n".join([document_list[idx[0]] for idx in search_results])
         rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
         system_prompt = "You are a helpful assistant tasked with providing answers using the context provided"
         conversation_id = request.conversation_id or str(uuid.uuid4())
         if request.enable_followup:
             llm_request = {
                 "query": rag_prompt,
                 "model_id": 'openai/gpt-4o-mini',
                 "user_id": request.user_id
             }
             endpoint_url = "https://pvanand-general-chat.hf.space/v2/followup-agent"
         else:
             llm_request = {
                 "prompt": rag_prompt,
             endpoint_url = "https://pvanand-audio-chat.hf.space/llm-agent"
         logger.info(f"Starting chat response generation for user: {request.user_id} Full request: {llm_request}")
+        def response_generator():
             full_response = ""
+            for chunk in stream_llm_request(api_key, llm_request, endpoint_url):
                 full_response += chunk
                 yield chunk
+            logger.info(f"Finished chat response generation for user: {request.user_id} Full response: {full_response}")
         return StreamingResponse(response_generator(), media_type="text/event-stream")
         raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
 @app.on_event("startup")
 async def startup_event():
     check_and_index_csv_files()