thecollabagepatch commited on
Commit
6d5b723
·
1 Parent(s): e87e83d

loudness is always a puzzle in the DAW...one more attempt

Browse files
Files changed (1) hide show
  1. one_shot_generation.py +23 -27
one_shot_generation.py CHANGED
@@ -218,12 +218,12 @@ def apply_barwise_loudness_match(
218
  beats_per_bar: int,
219
  method: str = "auto",
220
  headroom_db: float = 1.0,
221
- smooth_ms: int = 50, # small ramp between bars
222
  ) -> tuple[au.Waveform, dict]:
223
  """
224
- Bar-locked loudness matching. Tiles ref_loop to cover out, then
225
- per-bar calls match_loudness_to_reference() and applies gains with
226
- a short cross-ramp between bars for smoothness.
227
  """
228
  sr = int(out.sample_rate)
229
  spb = (60.0 / float(bpm)) * int(beats_per_bar)
@@ -231,6 +231,7 @@ def apply_barwise_loudness_match(
231
 
232
  y = out.samples.astype(np.float32, copy=False)
233
  if y.ndim == 1: y = y[:, None]
 
234
  if ref_loop.sample_rate != sr:
235
  ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
236
  else:
@@ -239,17 +240,17 @@ def apply_barwise_loudness_match(
239
  if ref.ndim == 1: ref = ref[:, None]
240
  if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
241
 
242
- # tile reference to length of out
243
- need = y.shape[0]
244
- reps = int(np.ceil(need / float(ref.shape[0]))) if ref.shape[0] else 1
245
- ref_tiled = np.tile(ref, (max(1, reps), 1))[:need]
246
 
 
 
 
 
247
  gains_db = []
248
  out_adj = y.copy()
 
249
  n_bars = max(1, int(np.ceil(need / float(bar_len))))
250
  ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
251
-
252
- # Minimum duration for LUFS measurement (400ms)
253
  min_lufs_samples = int(0.4 * sr)
254
 
255
  for i in range(n_bars):
@@ -258,38 +259,33 @@ def apply_barwise_loudness_match(
258
  if e <= s:
259
  break
260
 
261
- bar_duration = (e - s) / float(sr)
262
  bar_samples = e - s
263
-
264
- ref_bar = au.Waveform(ref_tiled[s:e], sr)
265
  tgt_bar = au.Waveform(out_adj[s:e], sr)
266
 
267
- # Skip loudness matching for bars shorter than LUFS minimum
268
- if method in ("auto", "lufs") and bar_samples < min_lufs_samples:
269
- # Fallback: use RMS for short segments, or skip entirely
270
- effective_method = "rms"
 
 
271
  else:
272
- effective_method = method
 
273
 
274
- matched_bar, stats = match_loudness_to_reference(
275
- ref_bar, tgt_bar, method=effective_method, headroom_db=headroom_db
276
- )
277
-
278
- # compute linear gain we actually applied
279
  g = matched_bar.samples.astype(np.float32, copy=False)
 
 
280
  if tgt_bar.samples.size > 0:
281
- # avoid divide-by-zero; infer average gain over the bar
282
  eps = 1e-12
283
  g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
284
  else:
285
  g_lin = 1.0
286
  gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
287
 
288
- # write with a short cross-ramp from previous bar
289
  if i > 0 and ramp > 0:
290
- ramp_len = min(ramp, e - s) # Don't ramp longer than the bar
291
  t = np.linspace(0.0, 1.0, ramp_len, dtype=np.float32)[:, None]
292
- # Blend from previous gain to current bar's gain
293
  out_adj[s:s+ramp_len] = (1.0 - t) * out_adj[s:s+ramp_len] + t * g[:ramp_len]
294
  out_adj[s+ramp_len:e] = g[ramp_len:e-s]
295
  else:
 
218
  beats_per_bar: int,
219
  method: str = "auto",
220
  headroom_db: float = 1.0,
221
+ smooth_ms: int = 50,
222
  ) -> tuple[au.Waveform, dict]:
223
  """
224
+ Bar-locked loudness matching that establishes the correct starting level
225
+ then maintains consistency. Only the first bar is matched to the reference;
226
+ subsequent bars maintain relative dynamics while preventing drift.
227
  """
228
  sr = int(out.sample_rate)
229
  spb = (60.0 / float(bpm)) * int(beats_per_bar)
 
231
 
232
  y = out.samples.astype(np.float32, copy=False)
233
  if y.ndim == 1: y = y[:, None]
234
+
235
  if ref_loop.sample_rate != sr:
236
  ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
237
  else:
 
240
  if ref.ndim == 1: ref = ref[:, None]
241
  if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
242
 
243
+ from utils import match_loudness_to_reference
 
 
 
244
 
245
+ # Measure reference loudness once (use first bar's worth if reference is long)
246
+ ref_bar_len = min(ref.shape[0], bar_len)
247
+ ref_bar = au.Waveform(ref[:ref_bar_len], sr)
248
+
249
  gains_db = []
250
  out_adj = y.copy()
251
+ need = y.shape[0]
252
  n_bars = max(1, int(np.ceil(need / float(bar_len))))
253
  ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
 
 
254
  min_lufs_samples = int(0.4 * sr)
255
 
256
  for i in range(n_bars):
 
259
  if e <= s:
260
  break
261
 
 
262
  bar_samples = e - s
 
 
263
  tgt_bar = au.Waveform(out_adj[s:e], sr)
264
 
265
+ # First bar: match to reference to establish starting level
266
+ if i == 0:
267
+ effective_method = "rms" if bar_samples < min_lufs_samples else method
268
+ matched_bar, stats = match_loudness_to_reference(
269
+ ref_bar, tgt_bar, method=effective_method, headroom_db=headroom_db
270
+ )
271
  else:
272
+ # Subsequent bars: just copy through (preserves model's dynamics)
273
+ matched_bar = tgt_bar
274
 
 
 
 
 
 
275
  g = matched_bar.samples.astype(np.float32, copy=False)
276
+
277
+ # Calculate gain that was applied
278
  if tgt_bar.samples.size > 0:
 
279
  eps = 1e-12
280
  g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
281
  else:
282
  g_lin = 1.0
283
  gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
284
 
285
+ # Apply with ramp for smoothness
286
  if i > 0 and ramp > 0:
287
+ ramp_len = min(ramp, e - s)
288
  t = np.linspace(0.0, 1.0, ramp_len, dtype=np.float32)[:, None]
 
289
  out_adj[s:s+ramp_len] = (1.0 - t) * out_adj[s:s+ramp_len] + t * g[:ramp_len]
290
  out_adj[s+ramp_len:e] = g[ramp_len:e-s]
291
  else: