Spaces:
Running
Running
better management on system prompt
Browse files
app.py
CHANGED
|
@@ -159,7 +159,6 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
| 159 |
pipe = load_pipeline(model_name)
|
| 160 |
|
| 161 |
# Use the pipeline directly with conversation history.
|
| 162 |
-
# Note: Many chat pipelines use internal chat templating to properly format the conversation.
|
| 163 |
response = pipe(
|
| 164 |
conversation,
|
| 165 |
max_new_tokens=max_tokens,
|
|
@@ -192,6 +191,16 @@ def cancel_generation():
|
|
| 192 |
cancel_event.set()
|
| 193 |
return "Cancellation requested."
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
# ------------------------------
|
| 196 |
# Gradio UI Definition
|
| 197 |
# ------------------------------
|
|
@@ -208,10 +217,12 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 208 |
value=default_model,
|
| 209 |
info="Choose from available models."
|
| 210 |
)
|
| 211 |
-
|
| 212 |
-
|
|
|
|
|
|
|
| 213 |
system_prompt_text = gr.Textbox(label="System Prompt",
|
| 214 |
-
value=
|
| 215 |
lines=3,
|
| 216 |
info="Define the base context for the AI's responses.")
|
| 217 |
gr.Markdown("### Generation Parameters")
|
|
@@ -226,8 +237,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 226 |
repeat_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.1, step=0.1,
|
| 227 |
info="Penalizes token repetition to improve diversity.")
|
| 228 |
gr.Markdown("### Web Search Settings")
|
| 229 |
-
enable_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False,
|
| 230 |
-
info="Include recent search context to improve answers.")
|
| 231 |
max_results_number = gr.Number(label="Max Search Results", value=6, precision=0,
|
| 232 |
info="Maximum number of search results to retrieve.")
|
| 233 |
max_chars_number = gr.Number(label="Max Chars per Result", value=600, precision=0,
|
|
@@ -239,13 +248,20 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 239 |
msg_input = gr.Textbox(label="Your Message", placeholder="Enter your message and press Enter")
|
| 240 |
search_debug = gr.Markdown(label="Web Search Debug")
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
def clear_chat():
|
| 243 |
return [], "", ""
|
| 244 |
|
| 245 |
clear_button.click(fn=clear_chat, outputs=[chatbot, msg_input, search_debug])
|
| 246 |
cancel_button.click(fn=cancel_generation, outputs=search_debug)
|
| 247 |
|
| 248 |
-
# Submission: the chat_response function is
|
| 249 |
msg_input.submit(
|
| 250 |
fn=chat_response,
|
| 251 |
inputs=[msg_input, chatbot, system_prompt_text, enable_search_checkbox,
|
|
|
|
| 159 |
pipe = load_pipeline(model_name)
|
| 160 |
|
| 161 |
# Use the pipeline directly with conversation history.
|
|
|
|
| 162 |
response = pipe(
|
| 163 |
conversation,
|
| 164 |
max_new_tokens=max_tokens,
|
|
|
|
| 191 |
cancel_event.set()
|
| 192 |
return "Cancellation requested."
|
| 193 |
|
| 194 |
+
# ------------------------------
|
| 195 |
+
# Helper Function for Default Prompt Update
|
| 196 |
+
# ------------------------------
|
| 197 |
+
def update_default_prompt(enable_search):
|
| 198 |
+
today = datetime.now().strftime('%Y-%m-%d')
|
| 199 |
+
if enable_search:
|
| 200 |
+
return f"You are a helpful assistant. Today is {today}. Please leverage the latest web data when responding to queries."
|
| 201 |
+
else:
|
| 202 |
+
return f"You are a helpful assistant. Today is {today}."
|
| 203 |
+
|
| 204 |
# ------------------------------
|
| 205 |
# Gradio UI Definition
|
| 206 |
# ------------------------------
|
|
|
|
| 217 |
value=default_model,
|
| 218 |
info="Choose from available models."
|
| 219 |
)
|
| 220 |
+
# Create the Enable Web Search checkbox.
|
| 221 |
+
enable_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False,
|
| 222 |
+
info="Include recent search context to improve answers.")
|
| 223 |
+
# Create the System Prompt textbox with an initial value.
|
| 224 |
system_prompt_text = gr.Textbox(label="System Prompt",
|
| 225 |
+
value=update_default_prompt(enable_search_checkbox.value),
|
| 226 |
lines=3,
|
| 227 |
info="Define the base context for the AI's responses.")
|
| 228 |
gr.Markdown("### Generation Parameters")
|
|
|
|
| 237 |
repeat_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.1, step=0.1,
|
| 238 |
info="Penalizes token repetition to improve diversity.")
|
| 239 |
gr.Markdown("### Web Search Settings")
|
|
|
|
|
|
|
| 240 |
max_results_number = gr.Number(label="Max Search Results", value=6, precision=0,
|
| 241 |
info="Maximum number of search results to retrieve.")
|
| 242 |
max_chars_number = gr.Number(label="Max Chars per Result", value=600, precision=0,
|
|
|
|
| 248 |
msg_input = gr.Textbox(label="Your Message", placeholder="Enter your message and press Enter")
|
| 249 |
search_debug = gr.Markdown(label="Web Search Debug")
|
| 250 |
|
| 251 |
+
# Wire the Enable Web Search checkbox change to update the System Prompt textbox.
|
| 252 |
+
enable_search_checkbox.change(
|
| 253 |
+
fn=update_default_prompt,
|
| 254 |
+
inputs=[enable_search_checkbox],
|
| 255 |
+
outputs=[system_prompt_text]
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
def clear_chat():
|
| 259 |
return [], "", ""
|
| 260 |
|
| 261 |
clear_button.click(fn=clear_chat, outputs=[chatbot, msg_input, search_debug])
|
| 262 |
cancel_button.click(fn=cancel_generation, outputs=search_debug)
|
| 263 |
|
| 264 |
+
# Submission: the chat_response function is used with the Transformers pipeline.
|
| 265 |
msg_input.submit(
|
| 266 |
fn=chat_response,
|
| 267 |
inputs=[msg_input, chatbot, system_prompt_text, enable_search_checkbox,
|