Spaces:
Running
Running
fix role disorder error in history
Browse files
app.py
CHANGED
|
@@ -216,14 +216,16 @@ if user_input:
|
|
| 216 |
if st.session_state.pending_response:
|
| 217 |
st.warning("Please wait for the assistant to finish responding.")
|
| 218 |
else:
|
| 219 |
-
#
|
| 220 |
-
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
| 221 |
with st.chat_message("user"):
|
| 222 |
st.markdown(user_input)
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
| 224 |
st.session_state.pending_response = True
|
| 225 |
|
| 226 |
-
#
|
| 227 |
if enable_search:
|
| 228 |
retrieved_context = retrieve_context(user_input, max_results=2, max_chars_per_result=150)
|
| 229 |
else:
|
|
@@ -231,20 +233,26 @@ if user_input:
|
|
| 231 |
st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
|
| 232 |
st.sidebar.text(retrieved_context or "No context found.")
|
| 233 |
|
| 234 |
-
# Build an augmented
|
| 235 |
-
if retrieved_context:
|
| 236 |
-
|
| 237 |
-
"
|
| 238 |
-
f"
|
|
|
|
|
|
|
| 239 |
)
|
| 240 |
else:
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
# Limit conversation history to the last 2 turns
|
| 245 |
MAX_TURNS = 2
|
| 246 |
trimmed_history = st.session_state.chat_history[-(MAX_TURNS * 2):]
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
# Generate response with the LLM in a streaming fashion
|
| 250 |
with st.chat_message("assistant"):
|
|
@@ -259,7 +267,6 @@ if user_input:
|
|
| 259 |
repeat_penalty=repeat_penalty,
|
| 260 |
stream=True,
|
| 261 |
)
|
| 262 |
-
|
| 263 |
for chunk in stream:
|
| 264 |
if "choices" in chunk:
|
| 265 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
|
@@ -268,7 +275,8 @@ if user_input:
|
|
| 268 |
visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
|
| 269 |
visible_response = re.sub(r"<think>.*$", "", visible_response, flags=re.DOTALL)
|
| 270 |
visible_placeholder.markdown(visible_response)
|
| 271 |
-
|
|
|
|
| 272 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
| 273 |
st.session_state.pending_response = False
|
| 274 |
-
gc.collect() #
|
|
|
|
| 216 |
if st.session_state.pending_response:
|
| 217 |
st.warning("Please wait for the assistant to finish responding.")
|
| 218 |
else:
|
| 219 |
+
# Display the raw user input immediately in the chat view.
|
|
|
|
| 220 |
with st.chat_message("user"):
|
| 221 |
st.markdown(user_input)
|
| 222 |
+
|
| 223 |
+
# Append the plain user message to chat history for display purposes.
|
| 224 |
+
# (We will later override the last user message in the API call with the augmented version.)
|
| 225 |
+
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
| 226 |
st.session_state.pending_response = True
|
| 227 |
|
| 228 |
+
# Retrieve extra context from web search if enabled
|
| 229 |
if enable_search:
|
| 230 |
retrieved_context = retrieve_context(user_input, max_results=2, max_chars_per_result=150)
|
| 231 |
else:
|
|
|
|
| 233 |
st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
|
| 234 |
st.sidebar.text(retrieved_context or "No context found.")
|
| 235 |
|
| 236 |
+
# Build an augmented user query by merging the system prompt (and search context when available)
|
| 237 |
+
if enable_search and retrieved_context:
|
| 238 |
+
augmented_user_input = (
|
| 239 |
+
f"{system_prompt_base.strip()}\n\n"
|
| 240 |
+
f"Use the following recent web search context to help answer the query:\n\n"
|
| 241 |
+
f"{retrieved_context}\n\n"
|
| 242 |
+
f"User Query: {user_input}"
|
| 243 |
)
|
| 244 |
else:
|
| 245 |
+
augmented_user_input = f"{system_prompt_base.strip()}\n\nUser Query: {user_input}"
|
| 246 |
+
|
| 247 |
+
# Limit conversation history to the last MAX_TURNS turns (user/assistant pairs)
|
|
|
|
| 248 |
MAX_TURNS = 2
|
| 249 |
trimmed_history = st.session_state.chat_history[-(MAX_TURNS * 2):]
|
| 250 |
+
|
| 251 |
+
# Replace the last user message (which is plain) with the augmented version for model input.
|
| 252 |
+
if trimmed_history and trimmed_history[-1]["role"] == "user":
|
| 253 |
+
messages = trimmed_history[:-1] + [{"role": "user", "content": augmented_user_input}]
|
| 254 |
+
else:
|
| 255 |
+
messages = trimmed_history + [{"role": "user", "content": augmented_user_input}]
|
| 256 |
|
| 257 |
# Generate response with the LLM in a streaming fashion
|
| 258 |
with st.chat_message("assistant"):
|
|
|
|
| 267 |
repeat_penalty=repeat_penalty,
|
| 268 |
stream=True,
|
| 269 |
)
|
|
|
|
| 270 |
for chunk in stream:
|
| 271 |
if "choices" in chunk:
|
| 272 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
|
|
|
| 275 |
visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
|
| 276 |
visible_response = re.sub(r"<think>.*$", "", visible_response, flags=re.DOTALL)
|
| 277 |
visible_placeholder.markdown(visible_response)
|
| 278 |
+
|
| 279 |
+
# Append the assistant's response to conversation history.
|
| 280 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
| 281 |
st.session_state.pending_response = False
|
| 282 |
+
gc.collect() # Free memory
|