Spaces:

thecollabagepatch
/

magenta-retry

Running

App Files Files Community

thecollabagepatch commited on 18 days ago

Commit

c4a08cc

1 Parent(s): 9f21d72

buffer queue customization added to websockets route

Browse files

Files changed (2) hide show

app.py +117 -10
magentaRT_rt_tester.html +49 -2

app.py CHANGED Viewed

@@ -1647,16 +1647,37 @@ async def ws_jam(websocket: WebSocket):
                         # kick off the ~2s streaming loop
                         async def _rt_loop():
                             try:
                                 mrt = websocket._mrt
                                 chunk_secs = (mrt.config.chunk_length_frames * mrt.config.frame_length_samples) / float(mrt.sample_rate)
                                 target_next = time.perf_counter()
                                 while websocket._rt_running:
                                     mrt.guidance_weight = websocket._rt_guid
                                     mrt.temperature     = websocket._rt_temp
                                     mrt.topk            = websocket._rt_topk
-                                    # ramp style
                                     ramp = float(getattr(websocket, "_style_ramp_s", 0.0) or 0.0)
                                     if ramp <= 0.0:
                                         websocket._style_cur = websocket._style_tgt
@@ -1664,38 +1685,100 @@ async def ws_jam(websocket: WebSocket):
                                         step = min(1.0, chunk_secs / ramp)
                                         websocket._style_cur = websocket._style_cur + step * (websocket._style_tgt - websocket._style_cur)
                                     wav, new_state = mrt.generate_chunk(state=websocket._state, style=websocket._style_cur)
                                     websocket._state = new_state
                                     x = wav.samples.astype(np.float32, copy=False)
                                     buf = io.BytesIO()
                                     sf.write(buf, x, mrt.sample_rate, subtype="FLOAT", format="WAV")
                                     ok = True
                                     if binary_audio:
                                         try:
                                             await websocket.send_bytes(buf.getvalue())
-                                            ok = await send_json({"type": "chunk_meta", "metadata": {"sample_rate": mrt.sample_rate}})
                                         except Exception:
                                             ok = False
                                     else:
                                         b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-                                        ok = await send_json({"type": "chunk", "audio_base64": b64,
-                                                            "metadata": {"sample_rate": mrt.sample_rate}})
                                     if not ok:
                                         break
                                     if getattr(websocket, "_pace", "asap") == "realtime":
-                                        t1 = time.perf_counter()
                                         target_next += chunk_secs
-                                        sleep_s = max(0.0, target_next - t1 - 0.03)
                                         if sleep_s > 0:
                                             await asyncio.sleep(sleep_s)
                             except asyncio.CancelledError:
                                 pass
-                            except Exception:
-                                pass
                         websocket._rt_task = asyncio.create_task(_rt_loop())
                         continue  # skip the “bar-mode started” message below
@@ -1737,11 +1820,18 @@ async def ws_jam(websocket: WebSocket):
                     )
                     await send_json({"type":"status", **res})  # {"ok": True}
                 else:
-                    # rt-mode: there’s no JamWorker; update the local knobs/state
                     websocket._rt_temp = float(msg.get("temperature", websocket._rt_temp))
                     websocket._rt_topk = int(msg.get("topk", websocket._rt_topk))
                     websocket._rt_guid = float(msg.get("guidance_weight", websocket._rt_guid))
                     # NEW steering fields
                     if "mean" in msg and msg["mean"] is not None:
                         try: websocket._rt_mean = float(msg["mean"])
@@ -1761,6 +1851,7 @@ async def ws_jam(websocket: WebSocket):
                     text_list = [s for s in (styles_str.split(",") if styles_str else []) if s.strip()]
                     text_w = [float(x) for x in style_weights_str.split(",")] if style_weights_str else []
                     asset_manager.ensure_assets_loaded(get_mrt())
                     websocket._style_tgt = build_style_vector(
                         websocket._mrt,
@@ -1771,12 +1862,28 @@ async def ws_jam(websocket: WebSocket):
                         mean_weight=float(websocket._rt_mean),
                         centroid_weights=websocket._rt_centroid_weights,
                     )
                     # optionally allow live changes to ramp:
                     if "style_ramp_seconds" in msg:
                         try: websocket._style_ramp_s = float(msg["style_ramp_seconds"])
                         except: pass
                     await send_json({"type":"status","updated":"rt-knobs+style"})
             elif mtype == "consume" and mode == "bar":
                 with jam_lock:
                     worker = jam_registry.get(msg.get("session_id"))

                         # kick off the ~2s streaming loop
                         async def _rt_loop():
+                            """
+                            Enhanced realtime generation loop with adaptive pacing.
+                            Prevents buffer underruns while keeping style updates responsive.
+                            """
                             try:
                                 mrt = websocket._mrt
                                 chunk_secs = (mrt.config.chunk_length_frames * mrt.config.frame_length_samples) / float(mrt.sample_rate)
                                 target_next = time.perf_counter()
+                                # ADAPTIVE PACING STATE
+                                # These thresholds define when to speed up or slow down
+                                BUFFER_LOW_THRESHOLD = 5.0      # Speed up if buffer < 3s
+                                BUFFER_TARGET = 5.0             # Target buffer level
+                                BUFFER_HIGH_THRESHOLD = 8.0     # Slow down if buffer > 7s
+                                BURST_CHUNKS = 3                # Number of chunks to burst ahead after updates
+                                # Pacing lookahead values (how far ahead to stay)
+                                LOOKAHEAD_BURST = 0.0           # No sleep during burst (go as fast as possible)
+                                LOOKAHEAD_NORMAL = 0.02         # Normal realtime (your current value)
+                                LOOKAHEAD_SLOW = 0.10           # When buffer is high, can afford more latency
+                                burst_countdown = 2             # Chunks remaining in burst mode
+                                last_buffer_level = BUFFER_TARGET  # Start assuming target
                                 while websocket._rt_running:
+                                    # Update model parameters (these are fast, just attribute assignments)
                                     mrt.guidance_weight = websocket._rt_guid
                                     mrt.temperature     = websocket._rt_temp
                                     mrt.topk            = websocket._rt_topk
+                                    # Ramp style vector (already implemented, keep as-is)
                                     ramp = float(getattr(websocket, "_style_ramp_s", 0.0) or 0.0)
                                     if ramp <= 0.0:
                                         websocket._style_cur = websocket._style_tgt
                                         step = min(1.0, chunk_secs / ramp)
                                         websocket._style_cur = websocket._style_cur + step * (websocket._style_tgt - websocket._style_cur)
+                                    # GENERATE CHUNK (this is the heavy operation)
+                                    t_gen_start = time.perf_counter()
                                     wav, new_state = mrt.generate_chunk(state=websocket._state, style=websocket._style_cur)
                                     websocket._state = new_state
+                                    t_gen_end = time.perf_counter()
+                                    # Encode audio
                                     x = wav.samples.astype(np.float32, copy=False)
                                     buf = io.BytesIO()
                                     sf.write(buf, x, mrt.sample_rate, subtype="FLOAT", format="WAV")
+                                    # Send to client
                                     ok = True
+                                    t_send_start = time.perf_counter()
                                     if binary_audio:
                                         try:
                                             await websocket.send_bytes(buf.getvalue())
+                                            ok = await send_json({
+                                                "type": "chunk_meta",
+                                                "metadata": {
+                                                    "sample_rate": mrt.sample_rate,
+                                                    "generation_time_ms": int((t_gen_end - t_gen_start) * 1000),
+                                                }
+                                            })
                                         except Exception:
                                             ok = False
                                     else:
                                         b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+                                        ok = await send_json({
+                                            "type": "chunk",
+                                            "audio_base64": b64,
+                                            "metadata": {
+                                                "sample_rate": mrt.sample_rate,
+                                                "generation_time_ms": int((t_gen_end - t_gen_start) * 1000),
+                                            }
+                                        })
+                                    t_send_end = time.perf_counter()
                                     if not ok:
                                         break
+                                    # ADAPTIVE PACING LOGIC
+                                    # Read buffer level from websocket attribute (updated by frontend in update messages)
+                                    current_buffer = getattr(websocket, "_frontend_buffer_seconds", last_buffer_level)
+                                    last_buffer_level = current_buffer
+                                    # Check if we received an update signal (set by the update message handler)
+                                    if getattr(websocket, "_rt_update_received", False):
+                                        # Burst ahead for a few chunks to rebuild buffer after style change
+                                        burst_countdown = BURST_CHUNKS
+                                        websocket._rt_update_received = False
+                                    # Determine pacing mode based on buffer level and burst state
+                                    if burst_countdown > 0:
+                                        # BURST MODE: Go as fast as possible
+                                        lookahead = LOOKAHEAD_BURST
+                                        burst_countdown -= 1
+                                        pacing_mode = "burst"
+                                    elif current_buffer < BUFFER_LOW_THRESHOLD:
+                                        # LOW BUFFER: Speed up
+                                        lookahead = LOOKAHEAD_BURST  # No sleep, catch up
+                                        pacing_mode = "catching_up"
+                                    elif current_buffer > BUFFER_HIGH_THRESHOLD:
+                                        # HIGH BUFFER: Can afford to slow down
+                                        lookahead = LOOKAHEAD_SLOW
+                                        pacing_mode = "relaxed"
+                                    else:
+                                        # NORMAL: Target buffer range
+                                        lookahead = LOOKAHEAD_NORMAL
+                                        pacing_mode = "normal"
+                                    # Apply pacing only if not in "asap" mode
                                     if getattr(websocket, "_pace", "asap") == "realtime":
+                                        t_now = time.perf_counter()
                                         target_next += chunk_secs
+                                        sleep_s = max(0.0, target_next - t_now - lookahead)
                                         if sleep_s > 0:
                                             await asyncio.sleep(sleep_s)
+                                        # Debug logging (can be removed in production)
+                                        gen_ms = int((t_gen_end - t_gen_start) * 1000)
+                                        send_ms = int((t_send_end - t_send_start) * 1000)
+                                        print(f"[RT] buffer:{current_buffer:.1f}s mode:{pacing_mode} gen:{gen_ms}ms send:{send_ms}ms sleep:{int(sleep_s*1000)}ms")
+                                    else:
+                                        # ASAP mode: don't sleep at all
+                                        pass
                             except asyncio.CancelledError:
                                 pass
+                            except Exception as e:
+                                print(f"[RT] generation error: {e}")
+                                import traceback
+                                traceback.print_exc()
                         websocket._rt_task = asyncio.create_task(_rt_loop())
                         continue  # skip the “bar-mode started” message below
                     )
                     await send_json({"type":"status", **res})  # {"ok": True}
                 else:
+                    # rt-mode: update knobs and style
                     websocket._rt_temp = float(msg.get("temperature", websocket._rt_temp))
                     websocket._rt_topk = int(msg.get("topk", websocket._rt_topk))
                     websocket._rt_guid = float(msg.get("guidance_weight", websocket._rt_guid))
+                    # NEW: Read frontend buffer level from update message
+                    if "frontend_buffer_seconds" in msg:
+                        try:
+                            websocket._frontend_buffer_seconds = float(msg["frontend_buffer_seconds"])
+                        except:
+                            pass
                     # NEW steering fields
                     if "mean" in msg and msg["mean"] is not None:
                         try: websocket._rt_mean = float(msg["mean"])
                     text_list = [s for s in (styles_str.split(",") if styles_str else []) if s.strip()]
                     text_w = [float(x) for x in style_weights_str.split(",")] if style_weights_str else []
+                    # Build style vector (this can take 50-200ms)
                     asset_manager.ensure_assets_loaded(get_mrt())
                     websocket._style_tgt = build_style_vector(
                         websocket._mrt,
                         mean_weight=float(websocket._rt_mean),
                         centroid_weights=websocket._rt_centroid_weights,
                     )
+                    # Signal to generation loop that update occurred (trigger burst mode)
+                    websocket._rt_update_received = True
                     # optionally allow live changes to ramp:
                     if "style_ramp_seconds" in msg:
                         try: websocket._style_ramp_s = float(msg["style_ramp_seconds"])
                         except: pass
                     await send_json({"type":"status","updated":"rt-knobs+style"})
+            elif mtype == "buffer_status":
+                # Frontend reporting its buffer level for adaptive pacing
+                if "frontend_buffer_seconds" in msg:
+                    try:
+                        websocket._frontend_buffer_seconds = float(msg["frontend_buffer_seconds"])
+                        # Optional: log for monitoring
+                        # print(f"[RT] frontend buffer: {websocket._frontend_buffer_seconds:.1f}s")
+                    except:
+                        pass
+                # No response needed, this is just status info
             elif mtype == "consume" and mode == "bar":
                 with jam_lock:
                     worker = jam_registry.get(msg.get("session_id"))

magentaRT_rt_tester.html CHANGED Viewed

@@ -353,6 +353,7 @@ function beginPlaybackFromPending() {
   let ws = null;
   let connected = false;
   let autoUpdateTimer = null;
   /**
    * Push a line into the log ring and schedule a single repaint via rAF.
@@ -391,7 +392,20 @@ function beginPlaybackFromPending() {
   function updateQueueUI() {
     const total = scheduled.reduce((acc, s) => acc + s.dur, 0);
-    queueEl.textContent = `${scheduled.length} buffers, ${total.toFixed(2)}s scheduled`;
   }
   function clearSchedule() {
@@ -452,7 +466,11 @@ async function scheduleWavBytes(arrayBuffer) {
   function sendUpdate() {
     if (!ws || ws.readyState !== 1) return;
-    const msg = { type: "update", ...currentParams() };
     ws.send(JSON.stringify(msg));
     log("→ update " + JSON.stringify(msg), "small");
   }
@@ -463,6 +481,30 @@ async function scheduleWavBytes(arrayBuffer) {
     autoUpdateTimer = setTimeout(sendUpdate, 150);
   }
   function linkRangeNumber(range, number, cb) {
     const sync = (fromRange) => {
       if (fromRange) number.value = range.value;
@@ -628,6 +670,9 @@ async function scheduleWavBytes(arrayBuffer) {
       ws.send(JSON.stringify(msg));
       log("→ start " + JSON.stringify(msg), "ok");
       nextTime = ctx.currentTime + 0.12;
     };
     ws.onmessage = async (ev) => {
@@ -682,6 +727,7 @@ async function scheduleWavBytes(arrayBuffer) {
       btnStop.disabled = true;
       setStatus("closed");
       log("connection closed", "warn");
     };
     ws.onerror = (e) => {
@@ -691,6 +737,7 @@ async function scheduleWavBytes(arrayBuffer) {
   function stop() {
     if (!connected) return;
     try {
       ws?.send(JSON.stringify({ type: "stop" }));
     } catch {}

   let ws = null;
   let connected = false;
   let autoUpdateTimer = null;
+  let bufferStatusInterval = null;  // NEW: For periodic buffer status reporting
   /**
    * Push a line into the log ring and schedule a single repaint via rAF.
   function updateQueueUI() {
     const total = scheduled.reduce((acc, s) => acc + s.dur, 0);
+    const bufferLevel = getBufferLevel();
+    const bufferStatus =
+      bufferLevel < 2.0 ? '🔴 CRITICAL' :
+      bufferLevel < 3.0 ? '🟡 LOW' :
+      bufferLevel < 5.0 ? '🟢 GOOD' :
+      '🔵 HIGH';
+    queueEl.textContent = `${scheduled.length} buffers, ${total.toFixed(2)}s scheduled | Buffer: ${bufferLevel.toFixed(2)}s ${bufferStatus}`;
+  }
+  function getBufferLevel() {
+    if (!ctx || !playing) return 0;
+    const currentTime = ctx.currentTime;
+    const bufferSeconds = Math.max(0, nextTime - currentTime);
+    return bufferSeconds;
   }
   function clearSchedule() {
   function sendUpdate() {
     if (!ws || ws.readyState !== 1) return;
+    const msg = {
+      type: "update",
+      ...currentParams(),
+      frontend_buffer_seconds: getBufferLevel()  // Include buffer level for adaptive pacing
+    };
     ws.send(JSON.stringify(msg));
     log("→ update " + JSON.stringify(msg), "small");
   }
     autoUpdateTimer = setTimeout(sendUpdate, 150);
   }
+  function startBufferStatusReporting() {
+    stopBufferStatusReporting();
+    bufferStatusInterval = setInterval(() => {
+      if (ws && ws.readyState === 1 && connected && playing) {
+        const frontend_buffer_seconds = getBufferLevel();
+        // Send status when buffer is low or periodically (every 2 seconds)
+        const now = Date.now();
+        if (frontend_buffer_seconds < 4.0 || now % 2000 < 500) {
+          ws.send(JSON.stringify({
+            type: 'buffer_status',
+            frontend_buffer_seconds
+          }));
+        }
+      }
+    }, 500);
+  }
+  function stopBufferStatusReporting() {
+    if (bufferStatusInterval !== null) {
+      clearInterval(bufferStatusInterval);
+      bufferStatusInterval = null;
+    }
+  }
   function linkRangeNumber(range, number, cb) {
     const sync = (fromRange) => {
       if (fromRange) number.value = range.value;
       ws.send(JSON.stringify(msg));
       log("→ start " + JSON.stringify(msg), "ok");
       nextTime = ctx.currentTime + 0.12;
+      // Start buffer status reporting for adaptive pacing
+      startBufferStatusReporting();
     };
     ws.onmessage = async (ev) => {
       btnStop.disabled = true;
       setStatus("closed");
       log("connection closed", "warn");
+      stopBufferStatusReporting();  // Stop buffer reporting
     };
     ws.onerror = (e) => {
   function stop() {
     if (!connected) return;
+    stopBufferStatusReporting();  // Stop buffer reporting
     try {
       ws?.send(JSON.stringify({ type: "stop" }));
     } catch {}