Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -77,7 +77,8 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
|
|
| 77 |
else:
|
| 78 |
max_llm_history -= 1
|
| 79 |
if max_llm_history < 2:
|
| 80 |
-
|
|
|
|
| 81 |
|
| 82 |
try:
|
| 83 |
response = await or_client.chat.completions.create(
|
|
@@ -96,7 +97,6 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
|
|
| 96 |
|
| 97 |
# After streaming, add the full response to the conversation history
|
| 98 |
messages.append({"role": "assistant", "content": full_response})
|
| 99 |
-
return full_response
|
| 100 |
except Exception as e:
|
| 101 |
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
|
| 102 |
|
|
@@ -175,12 +175,6 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
|
|
| 175 |
|
| 176 |
# Limit tokens in the conversation history
|
| 177 |
limited_conversation = conversations[query.conversation_id]
|
| 178 |
-
while calculate_tokens(limited_conversation) > 8000:
|
| 179 |
-
if len(limited_conversation) > 2: # Keep at least the system message and the latest user message
|
| 180 |
-
limited_conversation.pop(1)
|
| 181 |
-
else:
|
| 182 |
-
error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
|
| 183 |
-
raise HTTPException(status_code=400, detail=error_message)
|
| 184 |
|
| 185 |
async def process_response():
|
| 186 |
full_response = ""
|
|
|
|
| 77 |
else:
|
| 78 |
max_llm_history -= 1
|
| 79 |
if max_llm_history < 2:
|
| 80 |
+
error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
|
| 81 |
+
raise HTTPException(status_code=400, detail=error_message)
|
| 82 |
|
| 83 |
try:
|
| 84 |
response = await or_client.chat.completions.create(
|
|
|
|
| 97 |
|
| 98 |
# After streaming, add the full response to the conversation history
|
| 99 |
messages.append({"role": "assistant", "content": full_response})
|
|
|
|
| 100 |
except Exception as e:
|
| 101 |
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
|
| 102 |
|
|
|
|
| 175 |
|
| 176 |
# Limit tokens in the conversation history
|
| 177 |
limited_conversation = conversations[query.conversation_id]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
async def process_response():
|
| 180 |
full_response = ""
|