Spaces:

fishaudio
/

fish-agent

Running on L40S

SpicyqSama007 commited on Nov 7, 2024

Commit

0d147db

verified ·

1 Parent(s): 5bea296

Update app.py (#5)

- Update app.py (4b5d9b5edba474b3f489ab40ea4f03b6d36f73d7)

Co-authored-by: anya <SpicyqSama007@users.noreply.huggingface.co>

Files changed (1) hide show

app.py CHANGED Viewed

@@ -117,16 +117,21 @@ async def process_audio_input(
     ):
         if event.type == FishE2EEventType.USER_CODES:
             append_to_chat_ctx(ServeVQPart(codes=event.vq_codes), role="user")
         elif event.type == FishE2EEventType.SPEECH_SEGMENT:
             append_to_chat_ctx(ServeVQPart(codes=event.vq_codes))
-            yield state.get_history(), wav_chunk_header() + event.frame.data, None, None
         elif event.type == FishE2EEventType.TEXT_SEGMENT:
             append_to_chat_ctx(ServeTextPart(text=event.text))
-            yield state.get_history(), None, None, None
-    yield state.get_history(), None, None, None
 async def process_text_input(
@@ -186,9 +191,7 @@ def create_demo():
                 output_audio = gr.Audio(
                     label="Assistant's Voice",
-                    streaming=True,
-                    autoplay=True,
-                    interactive=False,
                 )
                 send_button = gr.Button("Send", variant="primary")

     ):
         if event.type == FishE2EEventType.USER_CODES:
             append_to_chat_ctx(ServeVQPart(codes=event.vq_codes), role="user")
         elif event.type == FishE2EEventType.SPEECH_SEGMENT:
+            result_audio += event.frame.data
+            np_audio = np.frombuffer(result_audio, dtype=np.int16)
             append_to_chat_ctx(ServeVQPart(codes=event.vq_codes))
+            yield state.get_history(), (44100, np_audio), None, None
         elif event.type == FishE2EEventType.TEXT_SEGMENT:
             append_to_chat_ctx(ServeTextPart(text=event.text))
+            if result_audio:
+                np_audio = np.frombuffer(result_audio, dtype=np.int16)
+                yield state.get_history(), (44100, np_audio), None, None
+            else:
+                yield state.get_history(), None, None, None
+    np_audio = np.frombuffer(result_audio, dtype=np.int16)
+    yield state.get_history(), (44100, np_audio), None, None
 async def process_text_input(
                 output_audio = gr.Audio(
                     label="Assistant's Voice",
+                    type="numpy",
                 )
                 send_button = gr.Button("Send", variant="primary")