Spaces:
Sleeping
Sleeping
update format
Browse files
main.py
CHANGED
|
@@ -593,12 +593,16 @@ async def followup_agent(query: FollowupQueryModel, background_tasks: Background
|
|
| 593 |
# Limit tokens in the conversation history
|
| 594 |
limited_conversation = conversations[query.conversation_id]
|
| 595 |
|
|
|
|
| 596 |
def process_response():
|
|
|
|
| 597 |
full_response = ""
|
| 598 |
for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
|
| 599 |
full_response += content
|
| 600 |
yield content
|
| 601 |
-
|
|
|
|
|
|
|
| 602 |
logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
|
| 603 |
response_content, interact,tools = parse_followup_and_tools(full_response)
|
| 604 |
|
|
@@ -606,7 +610,9 @@ async def followup_agent(query: FollowupQueryModel, background_tasks: Background
|
|
| 606 |
"clarification": interact
|
| 607 |
}
|
| 608 |
|
| 609 |
-
yield "<json>"
|
|
|
|
|
|
|
| 610 |
|
| 611 |
# Add the assistant's response to the conversation history
|
| 612 |
conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
|
|
|
|
| 593 |
# Limit tokens in the conversation history
|
| 594 |
limited_conversation = conversations[query.conversation_id]
|
| 595 |
|
| 596 |
+
|
| 597 |
def process_response():
|
| 598 |
+
yield "<followup-response>"
|
| 599 |
full_response = ""
|
| 600 |
for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
|
| 601 |
full_response += content
|
| 602 |
yield content
|
| 603 |
+
|
| 604 |
+
yield "</followup-response>"
|
| 605 |
+
|
| 606 |
logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
|
| 607 |
response_content, interact,tools = parse_followup_and_tools(full_response)
|
| 608 |
|
|
|
|
| 610 |
"clarification": interact
|
| 611 |
}
|
| 612 |
|
| 613 |
+
yield "<followp-json>"
|
| 614 |
+
yield json.dumps(result)
|
| 615 |
+
yield "</followp-json>"
|
| 616 |
|
| 617 |
# Add the assistant's response to the conversation history
|
| 618 |
conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
|