Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Running

App Files Files Community

Luigi commited on 27 days ago

Commit

fd70375

1 Parent(s): 9ac7f36

bugfix

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -511,6 +511,9 @@ def chat_response(user_msg, chat_history, system_prompt,
         in_thought = False
         assistant_message_started = False
         # Stream tokens
         for chunk in streamer:
             # Check for cancellation signal
@@ -554,12 +557,12 @@ def chat_response(user_msg, chat_history, system_prompt,
                 continue
             # Stream answer
-            if not answer_buf and not assistant_message_started:
                 history.append({'role': 'assistant', 'content': ''})
                 assistant_message_started = True
             answer_buf += text
-            history[-1]['content'] = answer_buf
             yield history, debug
         gen_thread.join()
@@ -625,27 +628,32 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
     # Group all inputs for cleaner event handling
     chat_inputs = [txt, chat, sys_prompt, search_chk, mr, mc, model_dd, max_tok, temp, k, p, rp, st]
-    # Group all UI components that change state
-    interactive_components = [txt, submit_btn, cancel_btn, chat, dbg]
     def submit_and_manage_ui(user_msg, chat_history, *args):
         """
-        An orchestrator function that manages the UI state and calls the backend chat function.
         It uses a try...finally block to ensure the UI is always reset.
         """
-        # Immediately update UI to a "generating" state
         yield {
-            # Add the user's message to the chat and a placeholder for the response
-            chat: chat_history + [[user_msg, None]],
             txt: gr.update(value="", interactive=False),
             submit_btn: gr.update(interactive=False),
             cancel_btn: gr.update(visible=True),
         }
         try:
-            # Package the arguments for the backend function
             backend_args = [user_msg, chat_history] + list(args)
-            # Stream the response from the backend
             for response_chunk in chat_response(*backend_args):
                 yield {
                     chat: response_chunk[0],
@@ -653,8 +661,14 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
                 }
         except Exception as e:
             print(f"An error occurred during generation: {e}")
         finally:
-            # Always reset the UI to an "idle" state, regardless of completion or cancellation
             print("Resetting UI state.")
             yield {
                 txt: gr.update(interactive=True),
@@ -667,22 +681,20 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
         cancel_event.set()
         print("Cancellation signal sent.")
-    # Event for submitting text via Enter key
     submit_event = txt.submit(
         fn=submit_and_manage_ui,
         inputs=chat_inputs,
-        outputs=interactive_components,
     )
-    # Event for submitting text via the "Submit" button
     submit_btn.click(
         fn=submit_and_manage_ui,
         inputs=chat_inputs,
-        outputs=interactive_components,
     )
-    # Event for the "Cancel" button. It calls the flag-setting function
-    # and, crucially, cancels the long-running submit_event.
     cancel_btn.click(
         fn=set_cancel_flag,
         cancels=[submit_event]

         in_thought = False
         assistant_message_started = False
+        # First yield contains the user message
+        yield history, debug
         # Stream tokens
         for chunk in streamer:
             # Check for cancellation signal
                 continue
             # Stream answer
+            if not assistant_message_started:
                 history.append({'role': 'assistant', 'content': ''})
                 assistant_message_started = True
             answer_buf += text
+            history[-1]['content'] = answer_buf.strip()
             yield history, debug
         gen_thread.join()
     # Group all inputs for cleaner event handling
     chat_inputs = [txt, chat, sys_prompt, search_chk, mr, mc, model_dd, max_tok, temp, k, p, rp, st]
+    # Group all UI components that can be updated.
+    ui_components = [chat, dbg, txt, submit_btn, cancel_btn]
     def submit_and_manage_ui(user_msg, chat_history, *args):
         """
+        Orchestrator function that manages UI state and calls the backend chat function.
         It uses a try...finally block to ensure the UI is always reset.
         """
+        if not user_msg.strip():
+            # If the message is empty, do nothing.
+            # We yield an empty dict to avoid any state changes.
+            yield {}
+            return
+        # 1. Update UI to "generating" state.
+        #    Crucially, we do NOT update the `chat` component here, as the backend
+        #    will provide the correctly formatted history in the first response chunk.
         yield {
             txt: gr.update(value="", interactive=False),
             submit_btn: gr.update(interactive=False),
             cancel_btn: gr.update(visible=True),
         }
         try:
+            # 2. Call the backend and stream updates
             backend_args = [user_msg, chat_history] + list(args)
             for response_chunk in chat_response(*backend_args):
                 yield {
                     chat: response_chunk[0],
                 }
         except Exception as e:
             print(f"An error occurred during generation: {e}")
+            # If an error happens, add it to the chat history to inform the user.
+            error_history = (chat_history or []) + [
+                {'role': 'user', 'content': user_msg},
+                {'role': 'assistant', 'content': f"**An error occurred:** {str(e)}"}
+            ]
+            yield {chat: error_history}
         finally:
+            # 3. ALWAYS reset the UI to an "idle" state upon completion, error, or cancellation.
             print("Resetting UI state.")
             yield {
                 txt: gr.update(interactive=True),
         cancel_event.set()
         print("Cancellation signal sent.")
+    # Event for submitting text via Enter key or Submit button
     submit_event = txt.submit(
         fn=submit_and_manage_ui,
         inputs=chat_inputs,
+        outputs=ui_components,
     )
     submit_btn.click(
         fn=submit_and_manage_ui,
         inputs=chat_inputs,
+        outputs=ui_components,
     )
+    # Event for the "Cancel" button.
+    # It calls the flag-setting function and cancels the long-running submit_event.
     cancel_btn.click(
         fn=set_cancel_flag,
         cancels=[submit_event]