Commit
·
6d5b723
1
Parent(s):
e87e83d
loudness is always a puzzle in the DAW...one more attempt
Browse files- one_shot_generation.py +23 -27
one_shot_generation.py
CHANGED
|
@@ -218,12 +218,12 @@ def apply_barwise_loudness_match(
|
|
| 218 |
beats_per_bar: int,
|
| 219 |
method: str = "auto",
|
| 220 |
headroom_db: float = 1.0,
|
| 221 |
-
smooth_ms: int = 50,
|
| 222 |
) -> tuple[au.Waveform, dict]:
|
| 223 |
"""
|
| 224 |
-
Bar-locked loudness matching
|
| 225 |
-
|
| 226 |
-
|
| 227 |
"""
|
| 228 |
sr = int(out.sample_rate)
|
| 229 |
spb = (60.0 / float(bpm)) * int(beats_per_bar)
|
|
@@ -231,6 +231,7 @@ def apply_barwise_loudness_match(
|
|
| 231 |
|
| 232 |
y = out.samples.astype(np.float32, copy=False)
|
| 233 |
if y.ndim == 1: y = y[:, None]
|
|
|
|
| 234 |
if ref_loop.sample_rate != sr:
|
| 235 |
ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
|
| 236 |
else:
|
|
@@ -239,17 +240,17 @@ def apply_barwise_loudness_match(
|
|
| 239 |
if ref.ndim == 1: ref = ref[:, None]
|
| 240 |
if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
|
| 241 |
|
| 242 |
-
|
| 243 |
-
need = y.shape[0]
|
| 244 |
-
reps = int(np.ceil(need / float(ref.shape[0]))) if ref.shape[0] else 1
|
| 245 |
-
ref_tiled = np.tile(ref, (max(1, reps), 1))[:need]
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
gains_db = []
|
| 248 |
out_adj = y.copy()
|
|
|
|
| 249 |
n_bars = max(1, int(np.ceil(need / float(bar_len))))
|
| 250 |
ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
|
| 251 |
-
|
| 252 |
-
# Minimum duration for LUFS measurement (400ms)
|
| 253 |
min_lufs_samples = int(0.4 * sr)
|
| 254 |
|
| 255 |
for i in range(n_bars):
|
|
@@ -258,38 +259,33 @@ def apply_barwise_loudness_match(
|
|
| 258 |
if e <= s:
|
| 259 |
break
|
| 260 |
|
| 261 |
-
bar_duration = (e - s) / float(sr)
|
| 262 |
bar_samples = e - s
|
| 263 |
-
|
| 264 |
-
ref_bar = au.Waveform(ref_tiled[s:e], sr)
|
| 265 |
tgt_bar = au.Waveform(out_adj[s:e], sr)
|
| 266 |
|
| 267 |
-
#
|
| 268 |
-
if
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
| 271 |
else:
|
| 272 |
-
|
|
|
|
| 273 |
|
| 274 |
-
matched_bar, stats = match_loudness_to_reference(
|
| 275 |
-
ref_bar, tgt_bar, method=effective_method, headroom_db=headroom_db
|
| 276 |
-
)
|
| 277 |
-
|
| 278 |
-
# compute linear gain we actually applied
|
| 279 |
g = matched_bar.samples.astype(np.float32, copy=False)
|
|
|
|
|
|
|
| 280 |
if tgt_bar.samples.size > 0:
|
| 281 |
-
# avoid divide-by-zero; infer average gain over the bar
|
| 282 |
eps = 1e-12
|
| 283 |
g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
|
| 284 |
else:
|
| 285 |
g_lin = 1.0
|
| 286 |
gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
|
| 287 |
|
| 288 |
-
#
|
| 289 |
if i > 0 and ramp > 0:
|
| 290 |
-
ramp_len = min(ramp, e - s)
|
| 291 |
t = np.linspace(0.0, 1.0, ramp_len, dtype=np.float32)[:, None]
|
| 292 |
-
# Blend from previous gain to current bar's gain
|
| 293 |
out_adj[s:s+ramp_len] = (1.0 - t) * out_adj[s:s+ramp_len] + t * g[:ramp_len]
|
| 294 |
out_adj[s+ramp_len:e] = g[ramp_len:e-s]
|
| 295 |
else:
|
|
|
|
| 218 |
beats_per_bar: int,
|
| 219 |
method: str = "auto",
|
| 220 |
headroom_db: float = 1.0,
|
| 221 |
+
smooth_ms: int = 50,
|
| 222 |
) -> tuple[au.Waveform, dict]:
|
| 223 |
"""
|
| 224 |
+
Bar-locked loudness matching that establishes the correct starting level
|
| 225 |
+
then maintains consistency. Only the first bar is matched to the reference;
|
| 226 |
+
subsequent bars maintain relative dynamics while preventing drift.
|
| 227 |
"""
|
| 228 |
sr = int(out.sample_rate)
|
| 229 |
spb = (60.0 / float(bpm)) * int(beats_per_bar)
|
|
|
|
| 231 |
|
| 232 |
y = out.samples.astype(np.float32, copy=False)
|
| 233 |
if y.ndim == 1: y = y[:, None]
|
| 234 |
+
|
| 235 |
if ref_loop.sample_rate != sr:
|
| 236 |
ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
|
| 237 |
else:
|
|
|
|
| 240 |
if ref.ndim == 1: ref = ref[:, None]
|
| 241 |
if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
|
| 242 |
|
| 243 |
+
from utils import match_loudness_to_reference
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
+
# Measure reference loudness once (use first bar's worth if reference is long)
|
| 246 |
+
ref_bar_len = min(ref.shape[0], bar_len)
|
| 247 |
+
ref_bar = au.Waveform(ref[:ref_bar_len], sr)
|
| 248 |
+
|
| 249 |
gains_db = []
|
| 250 |
out_adj = y.copy()
|
| 251 |
+
need = y.shape[0]
|
| 252 |
n_bars = max(1, int(np.ceil(need / float(bar_len))))
|
| 253 |
ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
|
|
|
|
|
|
|
| 254 |
min_lufs_samples = int(0.4 * sr)
|
| 255 |
|
| 256 |
for i in range(n_bars):
|
|
|
|
| 259 |
if e <= s:
|
| 260 |
break
|
| 261 |
|
|
|
|
| 262 |
bar_samples = e - s
|
|
|
|
|
|
|
| 263 |
tgt_bar = au.Waveform(out_adj[s:e], sr)
|
| 264 |
|
| 265 |
+
# First bar: match to reference to establish starting level
|
| 266 |
+
if i == 0:
|
| 267 |
+
effective_method = "rms" if bar_samples < min_lufs_samples else method
|
| 268 |
+
matched_bar, stats = match_loudness_to_reference(
|
| 269 |
+
ref_bar, tgt_bar, method=effective_method, headroom_db=headroom_db
|
| 270 |
+
)
|
| 271 |
else:
|
| 272 |
+
# Subsequent bars: just copy through (preserves model's dynamics)
|
| 273 |
+
matched_bar = tgt_bar
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
g = matched_bar.samples.astype(np.float32, copy=False)
|
| 276 |
+
|
| 277 |
+
# Calculate gain that was applied
|
| 278 |
if tgt_bar.samples.size > 0:
|
|
|
|
| 279 |
eps = 1e-12
|
| 280 |
g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
|
| 281 |
else:
|
| 282 |
g_lin = 1.0
|
| 283 |
gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
|
| 284 |
|
| 285 |
+
# Apply with ramp for smoothness
|
| 286 |
if i > 0 and ramp > 0:
|
| 287 |
+
ramp_len = min(ramp, e - s)
|
| 288 |
t = np.linspace(0.0, 1.0, ramp_len, dtype=np.float32)[:, None]
|
|
|
|
| 289 |
out_adj[s:s+ramp_len] = (1.0 - t) * out_adj[s:s+ramp_len] + t * g[:ramp_len]
|
| 290 |
out_adj[s+ramp_len:e] = g[ramp_len:e-s]
|
| 291 |
else:
|