yanyihan-xiaomi commited on
Commit
178417b
·
1 Parent(s): 966a9a2

Add Dockerfile and implement WebRTC functionality

Browse files

- Created Dockerfile for environment setup.
- Added WebRTC handling in mimo_webrtc.py.
- Updated requirements.txt for new dependencies.
- Enhanced .gitignore for better file management.

Files changed (6) hide show
  1. .gitignore +4 -2
  2. Dockerfile +26 -0
  3. README.md +4 -7
  4. app.py → mimo_webrtc.py +32 -102
  5. requirements.txt +4 -3
  6. webrtc_vad.py +192 -0
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
- **/__pycache__/**
2
- **/tmp/**
 
 
 
1
+ __pycache__
2
+ tmp
3
+ .venv
4
+ .vscode
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ ca-certificates \
7
+ curl \
8
+ libc++1 \
9
+ ffmpeg \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ RUN useradd -m -u 1000 user
14
+ ENV HOME=/home/user
15
+ ENV PATH="$HOME/.local/bin:$PATH"
16
+ USER user
17
+
18
+ WORKDIR /app
19
+
20
+ COPY --chown=user ./requirements.txt requirements.txt
21
+ RUN python3 -m pip install --no-cache-dir --upgrade pip \
22
+ && python3 -m pip install --no-cache-dir --upgrade -r requirements.txt
23
+
24
+ COPY --chown=user . /app
25
+
26
+ CMD ["python3.12", "-u", "mimo_webrtc.py"]
README.md CHANGED
@@ -1,13 +1,10 @@
1
  ---
2
  title: MiMo-Audio-Chat
3
- emoji: 🚀
4
  colorFrom: yellow
5
  colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.44.1
8
- app_file: app.py
9
- pinned: false
10
- python_version: 3.12.7
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: MiMo-Audio-Chat
3
+ emoji: 💬
4
  colorFrom: yellow
5
  colorTo: indigo
6
+ sdk: docker
7
+ app_port: 8087
 
 
 
8
  ---
9
 
10
+ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
app.py → mimo_webrtc.py RENAMED
@@ -3,11 +3,12 @@ import queue
3
  import random
4
  import time
5
  from threading import Thread
6
- from typing import Any, Callable, Literal, override
7
 
8
  import fastrtc
9
  import gradio as gr
10
  import httpx
 
11
  import numpy as np
12
 
13
  from api_schema import (
@@ -22,6 +23,7 @@ from api_schema import (
22
  TokenizedConversation,
23
  TokenizedMessage,
24
  )
 
25
 
26
  HF_TOKEN = os.getenv("HF_TOKEN")
27
  SERVER_LIST = os.getenv("SERVER_LIST")
@@ -66,7 +68,7 @@ def auth_headers() -> dict[str, str]:
66
 
67
 
68
  def get_cloudflare_turn_credentials(
69
- ttl: int = 1200, # 20 minutes
70
  ) -> dict[str, Any]:
71
  with httpx.Client() as client:
72
  response = client.post(
@@ -85,74 +87,6 @@ def get_cloudflare_turn_credentials(
85
  )
86
 
87
 
88
- class NeverVAD(fastrtc.PauseDetectionModel):
89
- def vad(self, *_args, **_kwargs):
90
- raise RuntimeError("NeverVAD should not be called.")
91
-
92
- def warmup(self):
93
- pass
94
-
95
-
96
- class ReplyOnMuted(fastrtc.ReplyOnPause):
97
- def __init__(
98
- self,
99
- fn: fastrtc.reply_on_pause.ReplyFnGenerator,
100
- startup_fn: Callable | None = None,
101
- can_interrupt: bool = True,
102
- needs_args: bool = False,
103
- ):
104
- super().__init__(
105
- fn,
106
- startup_fn,
107
- None,
108
- None,
109
- can_interrupt,
110
- "mono",
111
- 24000,
112
- None,
113
- 24000,
114
- NeverVAD(),
115
- needs_args,
116
- )
117
-
118
- def copy(self):
119
- return ReplyOnMuted(
120
- self.fn,
121
- self.startup_fn,
122
- self.can_interrupt,
123
- self.needs_args,
124
- )
125
-
126
- def determine_pause(
127
- self,
128
- audio: np.ndarray, # shape [samples,]
129
- sampling_rate: int,
130
- state: fastrtc.reply_on_pause.AppState,
131
- ):
132
- chunk_length = len(audio) / sampling_rate
133
- if chunk_length > 0.1:
134
- state.buffer = None
135
- if not state.started_talking:
136
- if not np.all(abs(audio) < 5):
137
- state.started_talking = True
138
- self.send_message_sync(
139
- fastrtc.utils.create_message("log", "started_talking")
140
- )
141
- if state.started_talking:
142
- if state.stream is None:
143
- state.stream = audio
144
- else:
145
- state.stream = np.concatenate((state.stream, audio))
146
-
147
- current_duration = len(state.stream) / sampling_rate
148
- if current_duration > 1.0:
149
- last_segment = state.stream[-int(sampling_rate * 0.1) :]
150
- if np.all(abs(last_segment) < 5):
151
- return True
152
-
153
- return False
154
-
155
-
156
  class ConversationManager:
157
  def __init__(self, assistant_style: AssistantStyle | None = None):
158
  self.conversation = TokenizedConversation(messages=[])
@@ -171,7 +105,15 @@ class ConversationManager:
171
 
172
  def append_audio_chunk(self, audio_chunk: tuple[int, np.ndarray]):
173
  sr, audio_data = audio_chunk
174
- assert sr == 24000, "Only 24kHz audio is supported"
 
 
 
 
 
 
 
 
175
  if audio_data.ndim > 1:
176
  # [channels, samples] -> [samples,]
177
  # Not Gradio style
@@ -185,7 +127,6 @@ class ConversationManager:
185
 
186
  def chat(
187
  self,
188
- url: httpx.URL,
189
  chat_id: int,
190
  input_audio: tuple[int, np.ndarray],
191
  global_sampler_config: SamplerConfig | None = None,
@@ -195,6 +136,7 @@ class ConversationManager:
195
  chat_queue = queue.Queue[ChatResponseItem | None]()
196
 
197
  def chat_task():
 
198
  req = ChatRequestBody(
199
  conversation=self.conversation,
200
  input_audio=ChatAudioBytes.from_audio(input_audio),
@@ -204,15 +146,11 @@ class ConversationManager:
204
  )
205
  first_output = True
206
  with httpx.Client() as client:
207
- headers = {
208
- "Content-Type": "application/json",
209
- "Authorization": f"Bearer {HF_TOKEN}", # <-- 加这一行
210
- }
211
  with client.stream(
212
  method="POST",
213
  url=url,
214
  content=req.model_dump_json(),
215
- headers=headers,
216
  ) as response:
217
  if response.status_code != 200:
218
  raise RuntimeError(f"Error {response.status_code}")
@@ -270,19 +208,6 @@ class ConversationManager:
270
  yield None
271
 
272
 
273
- def get_microphone_svg(muted: bool | None = None):
274
- muted_svg = '<line x1="1" y1="1" x2="23" y2="23"></line>' if muted else ""
275
- return f"""
276
- <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-mic" style="display: inline; vertical-align: middle;">
277
- <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
278
- <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
279
- <line x1="12" y1="19" x2="12" y2="23"></line>
280
- <line x1="8" y1="23" x2="16" y2="23"></line>
281
- {muted_svg}
282
- </svg>
283
- """
284
-
285
-
286
  class ConversationAbortController(AbortController):
287
  manager: ConversationManager
288
  cur_turn: int | None
@@ -309,6 +234,20 @@ def new_chat_id():
309
  return chat_id
310
 
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  def main():
313
  print("Starting WebRTC server")
314
 
@@ -401,9 +340,7 @@ def main():
401
  yield additional_outputs()
402
 
403
  try:
404
- url = chat_server_url("/audio-chat")
405
  for chunk in manager.chat(
406
- url,
407
  chat_id,
408
  input_audio,
409
  ):
@@ -453,13 +390,6 @@ def main():
453
  title_markdown = gr.Markdown(f"# {title}")
454
  with gr.Row():
455
  with gr.Column():
456
- with gr.Accordion("Usage"):
457
- gr.HTML(
458
- f"<li>Note: FastRTC's built-in VAD is quite sensitive. For better stability across environments, this demo uses a manual end-of-speech flow. It simply detects if the microphone is muted. That may lead to a bad experience when using auto-denoise microphone. We are trying to find a stable VAD model that works well with FastRTC.</li>"
459
- f"<li>Click Request Microphone to grant permission, click Record to start a turn, and click Stop to end the turn and clear the conversation history.</li>"
460
- f"<li>After you finish speaking, click the microphone icon {get_microphone_svg()} to end your input and wait for MiMo's reply.</li>"
461
- f"<li>While MiMo is speaking, you can interrupt by clicking the muted microphone icon {get_microphone_svg(muted=True)} and then speaking a new instruction.</li>"
462
- )
463
  chat = fastrtc.WebRTC(
464
  label="WebRTC Chat",
465
  modality="audio",
@@ -484,7 +414,7 @@ def main():
484
  "- `Preset Prompt` controls the response style.\n"
485
  "- `Preset Voice` controls the speaking tone.\n"
486
  "- `Custom Prompt` lets you define the response style in natural language (overrides `Preset Prompt`).\n"
487
- "- For best results, choose prompts and voices that match your language.\n"
488
  "- To apply new settings, end the current conversation and start a new one."
489
  )
490
  preset_character_dropdown = gr.Dropdown(
@@ -503,7 +433,7 @@ def main():
503
  )
504
 
505
  chat.stream(
506
- ReplyOnMuted(response),
507
  inputs=[
508
  chat,
509
  preset_character_dropdown,
@@ -526,7 +456,7 @@ def main():
526
  outputs=[title_markdown, preset_character_dropdown, preset_voice_dropdown],
527
  )
528
 
529
- demo.launch(show_api=False)
530
 
531
 
532
  if __name__ == "__main__":
 
3
  import random
4
  import time
5
  from threading import Thread
6
+ from typing import Any, Literal, override
7
 
8
  import fastrtc
9
  import gradio as gr
10
  import httpx
11
+ import librosa
12
  import numpy as np
13
 
14
  from api_schema import (
 
23
  TokenizedConversation,
24
  TokenizedMessage,
25
  )
26
+ from webrtc_vad import VADStreamHandler
27
 
28
  HF_TOKEN = os.getenv("HF_TOKEN")
29
  SERVER_LIST = os.getenv("SERVER_LIST")
 
68
 
69
 
70
  def get_cloudflare_turn_credentials(
71
+ ttl: int = 3600, # 1 hour
72
  ) -> dict[str, Any]:
73
  with httpx.Client() as client:
74
  response = client.post(
 
87
  )
88
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  class ConversationManager:
91
  def __init__(self, assistant_style: AssistantStyle | None = None):
92
  self.conversation = TokenizedConversation(messages=[])
 
105
 
106
  def append_audio_chunk(self, audio_chunk: tuple[int, np.ndarray]):
107
  sr, audio_data = audio_chunk
108
+ target_sr = 24000
109
+ if sr != target_sr:
110
+ audio_data = librosa.resample(
111
+ audio_data.astype(np.float32) / 32768.0,
112
+ orig_sr=sr,
113
+ target_sr=target_sr,
114
+ )
115
+ audio_data = (audio_data * 32767.0).astype(np.int16)
116
+ sr = target_sr
117
  if audio_data.ndim > 1:
118
  # [channels, samples] -> [samples,]
119
  # Not Gradio style
 
127
 
128
  def chat(
129
  self,
 
130
  chat_id: int,
131
  input_audio: tuple[int, np.ndarray],
132
  global_sampler_config: SamplerConfig | None = None,
 
136
  chat_queue = queue.Queue[ChatResponseItem | None]()
137
 
138
  def chat_task():
139
+ url = chat_server_url("/audio-chat")
140
  req = ChatRequestBody(
141
  conversation=self.conversation,
142
  input_audio=ChatAudioBytes.from_audio(input_audio),
 
146
  )
147
  first_output = True
148
  with httpx.Client() as client:
 
 
 
 
149
  with client.stream(
150
  method="POST",
151
  url=url,
152
  content=req.model_dump_json(),
153
+ headers={"Content-Type": "application/json", **auth_headers()},
154
  ) as response:
155
  if response.status_code != 200:
156
  raise RuntimeError(f"Error {response.status_code}")
 
208
  yield None
209
 
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  class ConversationAbortController(AbortController):
212
  manager: ConversationManager
213
  cur_turn: int | None
 
234
  return chat_id
235
 
236
 
237
+ def parse_gradio_audio(gradio_audio: tuple[int, np.ndarray]):
238
+ sr, audio = gradio_audio
239
+
240
+ if len(audio.shape) > 1:
241
+ # [samples, channels] -> [channels, samples]
242
+ audio = audio.T
243
+
244
+ if audio.dtype == np.int32:
245
+ audio = audio.astype(np.float32) / 2**31
246
+
247
+ # [samples] or [channels, samples]
248
+ return sr, audio
249
+
250
+
251
  def main():
252
  print("Starting WebRTC server")
253
 
 
340
  yield additional_outputs()
341
 
342
  try:
 
343
  for chunk in manager.chat(
 
344
  chat_id,
345
  input_audio,
346
  ):
 
390
  title_markdown = gr.Markdown(f"# {title}")
391
  with gr.Row():
392
  with gr.Column():
 
 
 
 
 
 
 
393
  chat = fastrtc.WebRTC(
394
  label="WebRTC Chat",
395
  modality="audio",
 
414
  "- `Preset Prompt` controls the response style.\n"
415
  "- `Preset Voice` controls the speaking tone.\n"
416
  "- `Custom Prompt` lets you define the response style in natural language (overrides `Preset Prompt`).\n"
417
+ "- For best results, choose prompts and voices that **match your language**. The default settings are optimized for **English**.\n"
418
  "- To apply new settings, end the current conversation and start a new one."
419
  )
420
  preset_character_dropdown = gr.Dropdown(
 
433
  )
434
 
435
  chat.stream(
436
+ VADStreamHandler(response),
437
  inputs=[
438
  chat,
439
  preset_character_dropdown,
 
456
  outputs=[title_markdown, preset_character_dropdown, preset_voice_dropdown],
457
  )
458
 
459
+ demo.launch(server_name="0.0.0.0", server_port=8087, show_api=False)
460
 
461
 
462
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- fastapi==0.116.1
2
- pydantic==2.11.7
3
  fastrtc==0.0.33
4
- gradio==5.44.1
5
  httpx==0.28.1
 
 
 
 
 
 
1
  fastrtc==0.0.33
2
+ gradio==5.35.0
3
  httpx==0.28.1
4
+ numpy==2.2.6
5
+ pydantic==2.11.7
6
+ ten-vad @ git+https://github.com/TEN-framework/ten-vad.git
webrtc_vad.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Callable, Generator, override
3
+
4
+ import fastrtc
5
+ import librosa
6
+ import numpy as np
7
+ from ten_vad import TenVad
8
+
9
+
10
+ @dataclass
11
+ class VADEvent:
12
+ interrupt_signal: bool | None = None
13
+ full_audio: tuple[int, np.ndarray] | None = None
14
+
15
+
16
+ class RealtimeVAD:
17
+ def __init__(
18
+ self,
19
+ src_sr: int = 24000,
20
+ hop_size: int = 256,
21
+ start_threshold: float = 0.8,
22
+ end_threshold: float = 0.7,
23
+ pad_start_s: float = 0.6,
24
+ min_positive_s: float = 0.4,
25
+ min_silence_s: float = 1.2,
26
+ ):
27
+ self.src_sr = src_sr
28
+ self.vad_sr = 16000
29
+ self.hop_size = hop_size
30
+ self.start_threshold = start_threshold
31
+ self.end_threshold = end_threshold
32
+ self.pad_start_s = pad_start_s
33
+ self.min_positive_s = min_positive_s
34
+ self.min_silence_s = min_silence_s
35
+
36
+ self.vad_model = TenVad(hop_size=hop_size)
37
+
38
+ self.vad_buffer = np.array([], dtype=np.int16)
39
+ """
40
+ VAD Buffer to store audio data for VAD processing
41
+ Stores 16kHz int16 PCM. Process and cut for each `hop_size` samples.
42
+ """
43
+ self.src_buffer = np.array([], dtype=np.int16)
44
+ """
45
+ Source Buffer to store original audio data
46
+ Stores original sampling rate (24kHz) int16 PCM.
47
+ Cut when pause detected (after `min_silence_s`).
48
+ Sliding window `pad_start_s` when inactive.
49
+ """
50
+
51
+ self.vad_buffer_offset = 0
52
+ self.src_buffer_offset = 0
53
+
54
+ self.active = False
55
+ self.interrupt_signal = False
56
+ self.sum_positive_s = 0.0
57
+ self.silence_start_s: float | None = None
58
+
59
+ def process(self, audio_data: np.ndarray):
60
+ if audio_data.ndim == 2:
61
+ # FastRTC style [channels, samples]
62
+ audio_data = audio_data[0]
63
+
64
+ # Append to buffers
65
+ self.src_buffer = np.concatenate((self.src_buffer, audio_data))
66
+
67
+ vad_audio_data = librosa.resample(
68
+ audio_data.astype(np.float32) / 32768.0,
69
+ orig_sr=self.src_sr,
70
+ target_sr=self.vad_sr,
71
+ )
72
+ vad_audio_data = (vad_audio_data * 32767.0).round().astype(np.int16)
73
+ self.vad_buffer = np.concatenate((self.vad_buffer, vad_audio_data))
74
+ vad_buffer_size = self.vad_buffer.shape[0]
75
+
76
+ def process_chunk(chunk_offset_s: float, vad_chunk: np.ndarray):
77
+ speech_prob, _ = self.vad_model.process(vad_chunk)
78
+
79
+ hop_s = self.hop_size / self.vad_sr
80
+
81
+ if not self.active:
82
+ if speech_prob >= self.start_threshold:
83
+ self.active = True
84
+ self.sum_positive_s = hop_s
85
+ print(f"[VAD] Active at {chunk_offset_s:.2f}s, {speech_prob=:.3f}")
86
+ else:
87
+ new_src_offset = int(
88
+ (chunk_offset_s - self.pad_start_s) * self.src_sr
89
+ )
90
+ cut_pos = new_src_offset - self.src_buffer_offset
91
+ if cut_pos > 0:
92
+ self.src_buffer = self.src_buffer[cut_pos:]
93
+ self.src_buffer_offset = new_src_offset
94
+ return
95
+
96
+ chunk_src_pos = int(chunk_offset_s * self.src_sr)
97
+
98
+ if speech_prob >= self.end_threshold:
99
+ self.silence_start_s = None
100
+ self.sum_positive_s += hop_s
101
+ if (
102
+ not self.interrupt_signal
103
+ and self.sum_positive_s >= self.min_positive_s
104
+ ):
105
+ self.interrupt_signal = True
106
+ yield VADEvent(interrupt_signal=True)
107
+ print(
108
+ f"[VAD] Interrupt signal at {chunk_offset_s:.2f}s, {speech_prob=:.3f}"
109
+ )
110
+ elif self.silence_start_s is None:
111
+ self.silence_start_s = chunk_offset_s
112
+
113
+ if (
114
+ self.silence_start_s is not None
115
+ and chunk_offset_s - self.silence_start_s >= self.min_silence_s
116
+ ):
117
+ # Inactive now
118
+ cut_pos = chunk_src_pos - self.src_buffer_offset
119
+ if self.interrupt_signal:
120
+ webrtc_audio = self.src_buffer[np.newaxis, :cut_pos]
121
+ yield VADEvent(full_audio=(self.src_sr, webrtc_audio))
122
+ print(
123
+ f"[VAD] Full audio at {chunk_offset_s:.2f}s, {webrtc_audio.shape=}"
124
+ )
125
+ self.src_buffer = self.src_buffer[cut_pos:]
126
+ self.src_buffer_offset = chunk_src_pos
127
+
128
+ self.active = False
129
+ self.interrupt_signal = False
130
+ self.sum_positive_s = 0.0
131
+ self.silence_start_s = None
132
+
133
+ for chunk_pos in range(0, vad_buffer_size - self.hop_size, self.hop_size):
134
+ processed_samples = chunk_pos + self.hop_size
135
+ chunk_offset_s = (self.vad_buffer_offset + chunk_pos) / self.vad_sr
136
+ vad_chunk = self.vad_buffer[chunk_pos : chunk_pos + self.hop_size]
137
+ yield from process_chunk(chunk_offset_s, vad_chunk)
138
+
139
+ self.vad_buffer = self.vad_buffer[processed_samples:]
140
+ self.vad_buffer_offset += processed_samples
141
+
142
+
143
+ type StreamerGenerator = Generator[fastrtc.tracks.EmitType, None, None]
144
+ type StreamerFn = Callable[[tuple[int, np.ndarray], str], StreamerGenerator]
145
+
146
+
147
+ class VADStreamHandler(fastrtc.StreamHandler):
148
+ def __init__(
149
+ self,
150
+ streamer_fn: StreamerFn,
151
+ input_sample_rate: int = 24000,
152
+ ):
153
+ super().__init__(
154
+ "mono",
155
+ 24000,
156
+ None,
157
+ input_sample_rate,
158
+ 30,
159
+ )
160
+ self.streamer_fn = streamer_fn
161
+ self.realtime_vad = RealtimeVAD(src_sr=input_sample_rate)
162
+ self.generator: StreamerGenerator | None = None
163
+
164
+ @override
165
+ def emit(self) -> fastrtc.tracks.EmitType:
166
+ if self.generator is None:
167
+ return None
168
+
169
+ try:
170
+ return next(self.generator)
171
+ except StopIteration:
172
+ self.generator = None
173
+ return None
174
+
175
+ @override
176
+ def receive(self, frame: tuple[int, np.ndarray]):
177
+ _, audio_data = frame
178
+ for event in self.realtime_vad.process(audio_data):
179
+ if event.interrupt_signal:
180
+ self.generator = None
181
+ self.clear_queue()
182
+ if event.full_audio is not None:
183
+ self.wait_for_args_sync()
184
+ self.latest_args[0] = event.full_audio
185
+ self.generator = self.streamer_fn(*self.latest_args)
186
+
187
+ @override
188
+ def copy(self):
189
+ return VADStreamHandler(
190
+ self.streamer_fn,
191
+ input_sample_rate=self.input_sample_rate,
192
+ )