general_chat

Running

pvanand commited on Jul 15, 2024

Commit

bc4a455

verified ·

1 Parent(s): 9fa0975

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -77,7 +77,8 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
         else:
             max_llm_history -= 1
             if max_llm_history < 2:
-                raise ValueError("Unable to reduce message length below token limit")
     try:
         response = await or_client.chat.completions.create(
@@ -96,7 +97,6 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
         # After streaming, add the full response to the conversation history
         messages.append({"role": "assistant", "content": full_response})
-        return full_response
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
@@ -175,12 +175,6 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
     # Limit tokens in the conversation history
     limited_conversation = conversations[query.conversation_id]
-    while calculate_tokens(limited_conversation) > 8000:
-        if len(limited_conversation) > 2:  # Keep at least the system message and the latest user message
-            limited_conversation.pop(1)
-        else:
-            error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
-            raise HTTPException(status_code=400, detail=error_message)
     async def process_response():
         full_response = ""

         else:
             max_llm_history -= 1
             if max_llm_history < 2:
+                error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
+                raise HTTPException(status_code=400, detail=error_message)
     try:
         response = await or_client.chat.completions.create(
         # After streaming, add the full response to the conversation history
         messages.append({"role": "assistant", "content": full_response})
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
     # Limit tokens in the conversation history
     limited_conversation = conversations[query.conversation_id]
     async def process_response():
         full_response = ""