Commit
·
78cac08
1
Parent(s):
dfa1fc4
loudness matching improvement for single-shot generations in DAW
Browse files- one_shot_generation.py +89 -8
one_shot_generation.py
CHANGED
|
@@ -112,17 +112,21 @@ def generate_loop_continuation_with_mrt(
|
|
| 112 |
# Final exact-length trim to requested bars
|
| 113 |
out = hard_trim_seconds(stitched, total_secs)
|
| 114 |
|
| 115 |
-
#
|
| 116 |
-
out = out.peak_normalize(0.95)
|
| 117 |
apply_micro_fades(out, 5)
|
| 118 |
|
| 119 |
-
#
|
| 120 |
-
out, loud_stats =
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
|
| 125 |
-
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
def generate_style_only_with_mrt(
|
|
@@ -193,4 +197,81 @@ def generate_style_only_with_mrt(
|
|
| 193 |
out = out.peak_normalize(0.95)
|
| 194 |
apply_micro_fades(out, 5)
|
| 195 |
|
| 196 |
-
return out, None # loudness stats not applicable (no reference)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# Final exact-length trim to requested bars
|
| 113 |
out = hard_trim_seconds(stitched, total_secs)
|
| 114 |
|
| 115 |
+
# (optional) keep micro fades
|
|
|
|
| 116 |
apply_micro_fades(out, 5)
|
| 117 |
|
| 118 |
+
# Bar-wise loudness match so bar 1 sits right even if the model ramps up
|
| 119 |
+
out, loud_stats = apply_barwise_loudness_match(
|
| 120 |
+
out,
|
| 121 |
+
ref_loop=loop, # same source the jam path tiles per chunk
|
| 122 |
+
bpm=bpm,
|
| 123 |
+
beats_per_bar=beats_per_bar,
|
| 124 |
+
method=loudness_mode,
|
| 125 |
+
headroom_db=loudness_headroom_db,
|
| 126 |
)
|
| 127 |
|
| 128 |
+
# Optionally finish with a light peak cap to ~-1 dBFS (no re-scaling)
|
| 129 |
+
out = out.peak_normalize(0.95)
|
| 130 |
|
| 131 |
|
| 132 |
def generate_style_only_with_mrt(
|
|
|
|
| 197 |
out = out.peak_normalize(0.95)
|
| 198 |
apply_micro_fades(out, 5)
|
| 199 |
|
| 200 |
+
return out, None # loudness stats not applicable (no reference)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
# loudness matching helper for /generate:
|
| 204 |
+
|
| 205 |
+
def apply_barwise_loudness_match(
|
| 206 |
+
out: au.Waveform,
|
| 207 |
+
ref_loop: au.Waveform,
|
| 208 |
+
*,
|
| 209 |
+
bpm: float,
|
| 210 |
+
beats_per_bar: int,
|
| 211 |
+
method: str = "auto",
|
| 212 |
+
headroom_db: float = 1.0,
|
| 213 |
+
smooth_ms: int = 50, # small ramp between bars
|
| 214 |
+
) -> tuple[au.Waveform, dict]:
|
| 215 |
+
"""
|
| 216 |
+
Bar-locked loudness matching. Tiles ref_loop to cover out, then
|
| 217 |
+
per-bar calls match_loudness_to_reference() and applies gains with
|
| 218 |
+
a short cross-ramp between bars for smoothness.
|
| 219 |
+
"""
|
| 220 |
+
sr = int(out.sample_rate)
|
| 221 |
+
spb = (60.0 / float(bpm)) * int(beats_per_bar)
|
| 222 |
+
bar_len = int(round(spb * sr))
|
| 223 |
+
|
| 224 |
+
y = out.samples.astype(np.float32, copy=False)
|
| 225 |
+
if y.ndim == 1: y = y[:, None]
|
| 226 |
+
if ref_loop.sample_rate != sr:
|
| 227 |
+
ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
|
| 228 |
+
else:
|
| 229 |
+
ref = ref_loop.as_stereo().samples.astype(np.float32, copy=False)
|
| 230 |
+
|
| 231 |
+
if ref.ndim == 1: ref = ref[:, None]
|
| 232 |
+
if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
|
| 233 |
+
|
| 234 |
+
# tile reference to length of out
|
| 235 |
+
need = y.shape[0]
|
| 236 |
+
reps = int(np.ceil(need / float(ref.shape[0]))) if ref.shape[0] else 1
|
| 237 |
+
ref_tiled = np.tile(ref, (max(1, reps), 1))[:need]
|
| 238 |
+
|
| 239 |
+
from .utils import match_loudness_to_reference # same module in your tree
|
| 240 |
+
|
| 241 |
+
gains_db = []
|
| 242 |
+
out_adj = y.copy()
|
| 243 |
+
n_bars = max(1, int(np.ceil(need / float(bar_len))))
|
| 244 |
+
ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
|
| 245 |
+
|
| 246 |
+
for i in range(n_bars):
|
| 247 |
+
s = i * bar_len
|
| 248 |
+
e = min(need, s + bar_len)
|
| 249 |
+
if e <= s: break
|
| 250 |
+
|
| 251 |
+
ref_bar = au.Waveform(ref_tiled[s:e], sr)
|
| 252 |
+
tgt_bar = au.Waveform(out_adj[s:e], sr)
|
| 253 |
+
|
| 254 |
+
matched_bar, stats = match_loudness_to_reference(
|
| 255 |
+
ref_bar, tgt_bar, method=method, headroom_db=headroom_db
|
| 256 |
+
)
|
| 257 |
+
# compute linear gain we actually applied
|
| 258 |
+
g = matched_bar.samples.astype(np.float32, copy=False)
|
| 259 |
+
if tgt_bar.samples.size > 0:
|
| 260 |
+
# avoid divide-by-zero; infer average gain over the bar
|
| 261 |
+
eps = 1e-12
|
| 262 |
+
g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
|
| 263 |
+
else:
|
| 264 |
+
g_lin = 1.0
|
| 265 |
+
gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
|
| 266 |
+
|
| 267 |
+
# write with a short cross-ramp from previous bar
|
| 268 |
+
if i > 0 and ramp > 0:
|
| 269 |
+
r0 = max(s, s + ramp - (e - s)) # clamp if last bar shorter
|
| 270 |
+
t = np.linspace(0.0, 1.0, r0 - s, dtype=np.float32)[:, None]
|
| 271 |
+
out_adj[s:r0] = (1.0 - t) * out_adj[s:r0] + t * g[:r0-s]
|
| 272 |
+
out_adj[r0:e] = g[r0-s:e-s]
|
| 273 |
+
else:
|
| 274 |
+
out_adj[s:e] = g
|
| 275 |
+
|
| 276 |
+
out.samples = out_adj.astype(np.float32, copy=False)
|
| 277 |
+
return out, {"per_bar_gain_db": gains_db}
|