ZeroGPU-LLM-Inference

Running

App Files Files Community

Luigi commited on Apr 12

Commit

5f6306a

1 Parent(s): 5ed3cb3

better management on system prompt

Browse files

Files changed (1) hide show

app.py +23 -7

app.py CHANGED Viewed

@@ -159,7 +159,6 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
         pipe = load_pipeline(model_name)
         # Use the pipeline directly with conversation history.
-        # Note: Many chat pipelines use internal chat templating to properly format the conversation.
         response = pipe(
             conversation,
             max_new_tokens=max_tokens,
@@ -192,6 +191,16 @@ def cancel_generation():
     cancel_event.set()
     return "Cancellation requested."
 # ------------------------------
 # Gradio UI Definition
 # ------------------------------
@@ -208,10 +217,12 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
                 value=default_model,
                 info="Choose from available models."
             )
-            today = datetime.now().strftime('%Y-%m-%d')
-            default_prompt = f"You are a helpful assistant. Today is {today}. Please leverage the latest web data when responding to queries."
             system_prompt_text = gr.Textbox(label="System Prompt",
-                                            value=default_prompt,
                                             lines=3,
                                             info="Define the base context for the AI's responses.")
             gr.Markdown("### Generation Parameters")
@@ -226,8 +237,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
             repeat_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.1, step=0.1,
                                               info="Penalizes token repetition to improve diversity.")
             gr.Markdown("### Web Search Settings")
-            enable_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False,
-                                                 info="Include recent search context to improve answers.")
             max_results_number = gr.Number(label="Max Search Results", value=6, precision=0,
                                            info="Maximum number of search results to retrieve.")
             max_chars_number = gr.Number(label="Max Chars per Result", value=600, precision=0,
@@ -239,13 +248,20 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
             msg_input = gr.Textbox(label="Your Message", placeholder="Enter your message and press Enter")
             search_debug = gr.Markdown(label="Web Search Debug")
     def clear_chat():
         return [], "", ""
     clear_button.click(fn=clear_chat, outputs=[chatbot, msg_input, search_debug])
     cancel_button.click(fn=cancel_generation, outputs=search_debug)
-    # Submission: the chat_response function is now used with the Transformers pipeline.
     msg_input.submit(
         fn=chat_response,
         inputs=[msg_input, chatbot, system_prompt_text, enable_search_checkbox,

         pipe = load_pipeline(model_name)
         # Use the pipeline directly with conversation history.
         response = pipe(
             conversation,
             max_new_tokens=max_tokens,
     cancel_event.set()
     return "Cancellation requested."
+# ------------------------------
+# Helper Function for Default Prompt Update
+# ------------------------------
+def update_default_prompt(enable_search):
+    today = datetime.now().strftime('%Y-%m-%d')
+    if enable_search:
+        return f"You are a helpful assistant. Today is {today}. Please leverage the latest web data when responding to queries."
+    else:
+        return f"You are a helpful assistant. Today is {today}."
 # ------------------------------
 # Gradio UI Definition
 # ------------------------------
                 value=default_model,
                 info="Choose from available models."
             )
+            # Create the Enable Web Search checkbox.
+            enable_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False,
+                                                 info="Include recent search context to improve answers.")
+            # Create the System Prompt textbox with an initial value.
             system_prompt_text = gr.Textbox(label="System Prompt",
+                                            value=update_default_prompt(enable_search_checkbox.value),
                                             lines=3,
                                             info="Define the base context for the AI's responses.")
             gr.Markdown("### Generation Parameters")
             repeat_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.1, step=0.1,
                                               info="Penalizes token repetition to improve diversity.")
             gr.Markdown("### Web Search Settings")
             max_results_number = gr.Number(label="Max Search Results", value=6, precision=0,
                                            info="Maximum number of search results to retrieve.")
             max_chars_number = gr.Number(label="Max Chars per Result", value=600, precision=0,
             msg_input = gr.Textbox(label="Your Message", placeholder="Enter your message and press Enter")
             search_debug = gr.Markdown(label="Web Search Debug")
+    # Wire the Enable Web Search checkbox change to update the System Prompt textbox.
+    enable_search_checkbox.change(
+        fn=update_default_prompt,
+        inputs=[enable_search_checkbox],
+        outputs=[system_prompt_text]
+    )
     def clear_chat():
         return [], "", ""
     clear_button.click(fn=clear_chat, outputs=[chatbot, msg_input, search_debug])
     cancel_button.click(fn=cancel_generation, outputs=search_debug)
+    # Submission: the chat_response function is used with the Transformers pipeline.
     msg_input.submit(
         fn=chat_response,
         inputs=[msg_input, chatbot, system_prompt_text, enable_search_checkbox,