Spaces:

TuringsSolutions
/

test-gpt-omni

Sleeping

App Files Files Community

TuringsSolutions commited on Oct 12, 2024

Commit

94cfbe9

verified ·

1 Parent(s): 466a41a

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -12,9 +12,9 @@ class AppState:
     pause_detected: bool = False
     stopped: bool = False
     started_talking: bool = False
-    conversation: list = field(default_factory=list)  # Proper use of default_factory
-# Function to process audio input and detect pauses
 def process_audio(audio: tuple, state: AppState):
     if state.stream is None:
         state.stream = audio[1]
@@ -22,28 +22,24 @@ def process_audio(audio: tuple, state: AppState):
     else:
         state.stream = np.concatenate((state.stream, audio[1]))
-    # Detect if a pause has occurred (for simplicity, use 1-second threshold)
     pause_detected = len(state.stream) > state.sampling_rate * 1
     state.pause_detected = pause_detected
     if state.pause_detected:
-        return gr.Audio(recording=False), state  # Stop recording
     return None, state
-# Generate chatbot response based on user input (audio or text)
-def response(user_input, state: AppState, input_type: str):
     if input_type == "text":
-        # Handle text input
-        state.conversation.append({"role": "user", "content": user_input})
-        bot_response = f"Echo: {user_input}"  # Simulate response
         state.conversation.append({"role": "assistant", "content": bot_response})
         return bot_response, state
-    # Handle audio input if pause was detected
     if not state.pause_detected:
         return None, state
-    # Convert audio to WAV and store in conversation history
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
         state.stream.tobytes(),
@@ -57,12 +53,9 @@ def response(user_input, state: AppState, input_type: str):
         f.write(audio_buffer.getvalue())
     state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
-    # Simulate bot's response (replace with mini omni logic)
-    chatbot_response = b"Simulated response audio content"  # Placeholder
-    output_buffer = chatbot_response
     with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
-        f.write(output_buffer)
     state.conversation.append({"role": "assistant", "content": {"path": f.name, "mime_type": "audio/mp3"}})
     yield None, state
@@ -72,11 +65,11 @@ def start_recording_user(state: AppState):
     if not state.stopped:
         return gr.Audio(recording=True)
-# Gradio interface setup
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
             text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
         with gr.Column():
             chatbot = gr.Chatbot(label="Conversation", type="messages")
@@ -89,19 +82,26 @@ with gr.Blocks() as demo:
         process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
     )
-    # Handle responses for both text and audio inputs
     text_submit = text_input.submit(
         lambda txt, s: response(txt, s, "text"), [text_input, state], [chatbot, state]
     )
-    respond = input_audio.stop_recording(response, [None, state, "audio"], [output_audio, state])
     respond.then(lambda s: s.conversation, [state], [chatbot])
-    # Restart recording when audio playback stops
     restart = output_audio.stop(start_recording_user, [state], [input_audio])
-    # Stop button to cancel the conversation
     cancel = gr.Button("Stop Conversation", variant="stop")
-    cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None, [state, input_audio], cancels=[respond, restart])
 if __name__ == "__main__":
     demo.launch()

     pause_detected: bool = False
     stopped: bool = False
     started_talking: bool = False
+    conversation: list = field(default_factory=list)
+# Process audio input and detect pauses
 def process_audio(audio: tuple, state: AppState):
     if state.stream is None:
         state.stream = audio[1]
     else:
         state.stream = np.concatenate((state.stream, audio[1]))
     pause_detected = len(state.stream) > state.sampling_rate * 1
     state.pause_detected = pause_detected
     if state.pause_detected:
+        return gr.Audio(recording=False), state
     return None, state
+# Generate response based on input type (text or audio)
+def response(input_data, state: AppState, input_type: str):
     if input_type == "text":
+        state.conversation.append({"role": "user", "content": input_data})
+        bot_response = f"Echo: {input_data}"
         state.conversation.append({"role": "assistant", "content": bot_response})
         return bot_response, state
     if not state.pause_detected:
         return None, state
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
         state.stream.tobytes(),
         f.write(audio_buffer.getvalue())
     state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
+    chatbot_response = b"Simulated response audio content"
     with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+        f.write(chatbot_response)
     state.conversation.append({"role": "assistant", "content": {"path": f.name, "mime_type": "audio/mp3"}})
     yield None, state
     if not state.stopped:
         return gr.Audio(recording=True)
+# Gradio app setup
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            input_audio = gr.Audio(label="Input Audio", source="microphone", type="numpy")
             text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
         with gr.Column():
             chatbot = gr.Chatbot(label="Conversation", type="messages")
         process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
     )
+    # Handle text input submission
     text_submit = text_input.submit(
         lambda txt, s: response(txt, s, "text"), [text_input, state], [chatbot, state]
     )
+    # Handle audio stop recording
+    respond = input_audio.stop_recording(
+        lambda s: response(None, s, "audio"), [state], [output_audio, state]
+    )
     respond.then(lambda s: s.conversation, [state], [chatbot])
+    # Restart recording after audio playback ends
     restart = output_audio.stop(start_recording_user, [state], [input_audio])
+    # Stop conversation button
     cancel = gr.Button("Stop Conversation", variant="stop")
+    cancel.click(
+        lambda: (AppState(stopped=True), gr.Audio(recording=False)),
+        None, [state, input_audio], cancels=[respond, restart]
+    )
 if __name__ == "__main__":
     demo.launch()