Spaces:

AIvry
/

MAPSS-measures

Running on Zero

App Files Files Community

MAPSS-measures / distortions.py

AIvry

Upload 11 files

b759ccc verified 2 months ago

raw

history blame contribute delete

13.6 kB

	"""
	Distortions banks for the PS and the PM computations.
	"""

	import librosa
	import numpy as np
	from numpy.fft import irfft, rfft, rfftfreq
	from scipy.signal import butter, filtfilt, lfilter

	from config import ENERGY_WIN_MS, EPS, SR


	def sig_stats(x):
	A_pk = max(np.max(np.abs(x)), EPS)
	A_rms = max(np.sqrt(np.mean(x**2)), EPS)
	A_95 = max(np.percentile(np.abs(x), 95), EPS)
	return A_pk, A_rms, A_95


	def frame_distortions(
	frame,
	sr,
	distortion_keys,
	notch_freqs=None,
	low_cutoffs=None,
	high_cutoffs=None,
	frame_start=0,
	):
	notch_freqs = [] if notch_freqs is None else notch_freqs
	low_cutoffs = [] if low_cutoffs is None else low_cutoffs
	high_cutoffs = [] if high_cutoffs is None else high_cutoffs
	distortions = {}

	A_pk, A_rms, A_95 = sig_stats(frame)
	frame_len = len(frame)
	X = rfft(frame)
	freqs = rfftfreq(frame_len, 1 / sr)
	t = np.arange(frame_len) / sr

	if ("notch" in distortion_keys) or distortion_keys == "all":
	bw = 60.0
	for f0 in notch_freqs:
	Y = X.copy()
	band = (freqs > f0 - bw) & (freqs < f0 + bw)
	Y[band] = 0
	distortions[f"Notch_{int(round(f0))}Hz"] = irfft(Y, n=len(frame))

	if ("comb" in distortion_keys) or distortion_keys == "all":
	for d_ms, decay in zip([2.5, 5, 7.5, 10, 12.5, 15], [0.4, 0.5, 0.6, 0.7, 0.9]):
	D = int(sr * d_ms / 1000)
	if D >= frame_len:
	continue
	out = frame.copy()
	out[:-D] += decay * frame[D:]
	distortions[f"Comb_{int(d_ms)}ms"] = out

	if ("tremolo" in distortion_keys) or distortion_keys == "all":
	depth = 1.0
	t_centre = (frame_start + 0.5 * len(frame)) / sr
	for r_hz in [1, 2, 4, 6]:
	mod = (1 - depth) + depth * 0.5 * (1 + np.sin(2 * np.pi * r_hz * t_centre))
	distortions[f"Tremolo_{r_hz}Hz"] = frame * mod

	if ("noise" in distortion_keys) or distortion_keys == "all":
	nyq = sr / 2
	low_norm = 20 / nyq
	high_freq = min(20_000, 0.45 * sr)
	high_norm = min(high_freq / nyq, 0.99)
	b_band, a_band = butter(5, [low_norm, high_norm], btype="band")

	def add_noise(sig, snr_db, color="white"):
	nl_target = 10 ** (snr_db / 10)
	n = np.random.randn(len(sig))
	if color == "pink":
	n = np.cumsum(n)
	n /= max(np.max(np.abs(n)), 1e-12)
	elif color == "brown":
	n = np.cumsum(np.cumsum(n))
	n /= max(np.max(np.abs(n)), 1e-12)
	n = lfilter(b_band, a_band, n)
	rms_sig = np.sqrt(np.mean(sig**2))
	rms_n = np.sqrt(np.mean(n**2)) + 1e-12
	noise_rms = rms_sig / np.sqrt(nl_target)
	noise_rms = max(noise_rms, rms_sig / np.sqrt(10 ** (15 / 10)))
	n *= noise_rms / rms_n
	return sig + n

	for snr in [-15, -10, -5, 0, 5, 10, 15, 20, 25]:
	for clr in ["white", "pink", "brown"]:
	if (snr in [-15, -10, -5]) and (clr == "white"):
	continue
	distortions[f"{clr.capitalize()}Noise_{snr}dB"] = add_noise(
	frame, snr, clr
	)

	if ("harmonic" in distortion_keys) or distortion_keys == "all":
	for f_h, rel_amp in zip([100, 500, 1000, 4000], [0.4, 0.6, 0.8, 1.0]):
	tone = (rel_amp * A_rms) * np.sin(2 * np.pi * f_h * t)
	distortions[f"Harmonic_{f_h}Hz"] = frame + tone

	if ("reverb" in distortion_keys) or distortion_keys == "all":
	for tail_ms, decay in zip([50, 100, 200, 400], [0.3, 0.5, 0.7, 0.9]):
	L = int(sr * tail_ms / 1000)
	if L >= frame_len:
	continue
	irv = np.exp(-np.linspace(0, 6, L)) * decay
	reverbed = np.convolve(frame, irv)[:frame_len]
	distortions[f"Reverb_{tail_ms}ms"] = reverbed

	if ("noisegate" in distortion_keys) or distortion_keys == "all":
	for pct in [0.05, 0.10, 0.20, 0.40]:
	thr = pct * A_95
	g = frame.copy()
	g[np.abs(g) < thr] = 0
	distortions[f"NoiseGate_{int(pct * 100)}pct"] = g

	if ("pitch_shift" in distortion_keys) or distortion_keys == "all":
	n_fft = min(2048, frame_len // 2)
	for shift in [-4, -2, 2, 4]:
	y = librosa.effects.pitch_shift(frame, sr=sr, n_steps=shift, n_fft=n_fft)
	distortions[f"PitchShift_{shift}st"] = y[:frame_len]

	if ("lowpass" in distortion_keys) or distortion_keys == "all":
	for fc in low_cutoffs:
	if fc >= sr / 2 * 0.99:
	continue
	b, a = butter(6, fc / (sr / 2), btype="low")
	distortions[f"Lowpass_{fc}Hz"] = filtfilt(b, a, frame)

	if ("highpass" in distortion_keys) or distortion_keys == "all":
	for fc in high_cutoffs:
	if fc <= 20:
	continue
	b, a = butter(6, fc / (sr / 2), btype="high")
	distortions[f"Highpass_{fc}Hz"] = filtfilt(b, a, frame)

	if ("echo" in distortion_keys) or distortion_keys == "all":
	for delay_ms, amp in zip([50, 100, 150], [0.4, 0.5, 0.7]):
	D = int(sr * delay_ms / 1000)
	if D >= frame_len:
	continue
	echo = np.pad(frame, (D, 0), "constant")[:-D] * amp
	distortions[f"Echo_{delay_ms}ms"] = frame + echo

	if ("clipping" in distortion_keys) or distortion_keys == "all":
	for frac in [0.70, 0.50, 0.30]:
	thr = frac * A_95
	distortions[f"Clipping_{frac:.2f}p95"] = np.clip(frame, -thr, thr)

	if ("vibrato" in distortion_keys) or distortion_keys == "all":
	n_fft = min(2048, frame_len // 2)
	base_depth = 0.03 * (A_rms / A_pk)
	for rate_hz, scale in zip([3, 5, 7], [1.0, 1.3, 1.6]):
	depth = np.clip(base_depth * scale, 0.01, 0.05)
	y = librosa.effects.time_stretch(frame, rate=1 + depth, n_fft=n_fft)
	distortions[f"Vibrato_{rate_hz}Hz"] = librosa.util.fix_length(
	y, size=frame_len
	)

	return distortions


	def apply_pm_distortions(ref, distortion_keys, sr=SR):
	frame_len = int(ENERGY_WIN_MS * sr / 1000)
	n_frames = int(np.ceil(len(ref) / frame_len))
	pad_len = n_frames * frame_len - len(ref)
	ref_padded = (
	np.concatenate([ref, np.zeros(pad_len, dtype=ref.dtype)]) if pad_len else ref
	)

	X_full = rfft(ref_padded)
	freqs_f = rfftfreq(len(ref_padded), 1 / sr)
	mag_full = np.abs(X_full)

	valid = (freqs_f > 80) & (freqs_f < 0.45 * sr)
	cand_indices = np.argsort(mag_full[valid])[-60:]
	cand_freqs = freqs_f[valid][cand_indices]
	cand_freqs = cand_freqs[np.argsort(mag_full[valid][cand_indices])[::-1]]

	selected_notch_freqs = []
	for f0 in cand_freqs:
	if all(abs(f0 - f_sel) > 300 for f_sel in selected_notch_freqs):
	selected_notch_freqs.append(float(f0))
	if len(selected_notch_freqs) >= 20:
	break

	mag2 = np.abs(X_full) ** 2
	total_p = mag2.sum()
	cum_low = np.cumsum(mag2)
	q_low = [0.50, 0.70, 0.85, 0.95]
	lowpass_cutoffs = []
	for q in q_low:
	idx = np.searchsorted(cum_low, q * total_p)
	f_c = float(freqs_f[idx])
	lowpass_cutoffs.append(round(f_c / 100.0) * 100)

	cum_high = np.cumsum(mag2[::-1])
	q_high = [0.05, 0.15, 0.30, 0.50]
	highpass_cutoffs = []
	for q in q_high:
	idx = np.searchsorted(cum_high, q * total_p)
	f_c = float(freqs_f[-1 - idx])
	highpass_cutoffs.append(round(f_c / 100.0) * 100)

	lowpass_cutoffs = sorted(set(lowpass_cutoffs))
	highpass_cutoffs = sorted(set(highpass_cutoffs))

	out = {}
	for f in range(n_frames):
	start, end = f * frame_len, (f + 1) * frame_len
	frame = ref_padded[start:end]
	frame_dists = frame_distortions(
	frame,
	sr,
	distortion_keys,
	notch_freqs=selected_notch_freqs,
	low_cutoffs=lowpass_cutoffs,
	high_cutoffs=highpass_cutoffs,
	frame_start=start,
	)
	for lbl, sig in frame_dists.items():
	if lbl not in out:
	out[lbl] = np.zeros_like(ref_padded)
	out[lbl][start:end] = sig

	return list(out.values())


	def apply_ps_distortions(ref, distortion_keys, sr=SR):
	distortions = {}
	X = rfft(ref)
	freqs = rfftfreq(len(ref), 1 / sr)
	t = np.arange(len(ref)) / sr

	if ("notch" in distortion_keys) or distortion_keys == "all":
	for c in [500, 1000, 2000, 4000, 8000]:
	Y = X.copy()
	Y[(freqs > c - 50) & (freqs < c + 50)] = 0
	distortions[f"Notch_{c}Hz"] = irfft(Y, n=len(ref))

	if ("comb" in distortion_keys) or distortion_keys == "all":
	for d, decay in zip([2.5, 5, 7.5, 10, 12.5, 15], [0.4, 0.5, 0.6, 0.7, 0.9]):
	D = int(sr * d / 1000)
	if D >= len(ref):
	continue
	cpy = ref.copy()
	if len(ref) > D:
	cpy[:-D] += decay * ref[D:]
	distortions[f"Comb_{int(d)}ms"] = cpy

	if ("tremolo" in distortion_keys) or distortion_keys == "all":
	for r, depth in zip([1, 2, 4, 6], [0.3, 0.5, 0.8, 1.0]):
	mod = (1 - depth) + depth * 0.5 * (1 + np.sin(2 * np.pi * r * t))
	distortions[f"Tremolo_{r}Hz"] = ref * mod

	if ("noise" in distortion_keys) or distortion_keys == "all":

	def add_noise(signal, snr_db, color):
	rms = np.sqrt(np.mean(signal**2))
	nl = 10 ** (snr_db / 10)
	noise_rms = rms / np.sqrt(nl)
	n = np.random.randn(len(signal))
	if color == "pink":
	n = np.cumsum(n)
	n /= max(np.max(np.abs(n)), 1e-12)
	elif color == "brown":
	n = np.cumsum(np.cumsum(n))
	n /= max(np.max(np.abs(n)), 1e-12)
	return signal + noise_rms * n

	for snr in [-15, -10, -5, 0, 5, 10, 15, 20, 25]:
	for clr in ["white", "pink", "brown"]:
	if snr in [-15, -10, -5] and clr in ["white"]:
	continue
	distortions[f"{clr.capitalize()}Noise_{snr}dB"] = add_noise(
	ref, snr, clr
	)

	if ("harmonic" in distortion_keys) or distortion_keys == "all":
	for f_h, amp in zip([100, 500, 1000, 4000], [0.02, 0.03, 0.05, 0.08]):
	tone = amp * np.sin(2 * np.pi * f_h * t)
	distortions[f"Harmonic_{f_h}Hz"] = ref + tone

	if ("reverb" in distortion_keys) or distortion_keys == "all":
	for tail_ms, decay in zip([5, 10, 15, 20], [0.3, 0.5, 0.7, 0.9, 1.1]):
	L = int(sr * tail_ms / 1000)
	if L >= len(ref):
	continue
	irv = np.exp(-np.linspace(0, 3, L)) * decay
	reverbed = np.convolve(ref, irv)[: len(ref)]
	distortions[f"Reverb_{tail_ms}ms"] = reverbed

	if ("noisegate" in distortion_keys) or distortion_keys == "all":
	for thr in [0.005, 0.01, 0.02, 0.04]:
	g = ref.copy()
	g[np.abs(g) < thr] = 0
	distortions[f"NoiseGate_{thr}"] = g

	if ("pitch_shift" in distortion_keys) or distortion_keys == "all":
	n_fft = min(2048, len(ref) // 2)
	for shift in [-4, -2, 2, 4]:
	shifted = librosa.effects.pitch_shift(
	y=ref, sr=sr, n_steps=shift, n_fft=n_fft
	)
	distortions[f"PitchShift_{shift}st"] = shifted[: len(ref)]

	if ("lowpass" in distortion_keys) or distortion_keys == "all":
	for freq in [2000, 3000, 4000, 6000]:
	if freq >= (sr / 2):
	continue
	b, a = butter(4, freq / (sr / 2), "low")
	distortions[f"Lowpass_{freq}Hz"] = filtfilt(b, a, ref)

	if ("highpass" in distortion_keys) or distortion_keys == "all":
	for freq in [100, 300, 500, 800]:
	if freq >= (sr / 2):
	continue
	b, a = butter(4, freq / (sr / 2), "high")
	distortions[f"Highpass_{freq}Hz"] = filtfilt(b, a, ref)

	if ("echo" in distortion_keys) or distortion_keys == "all":
	for delay_ms, amp in zip([5, 10, 15, 20], [0.3, 0.5, 0.7]):
	delay = int(sr * delay_ms / 1000)
	if delay >= len(ref):
	continue
	echo = np.pad(ref, (delay, 0), "constant")[:-delay] * amp
	distortions[f"Echo_{delay_ms}ms"] = ref + echo

	if ("clipping" in distortion_keys) or distortion_keys == "all":
	for thr in [0.3, 0.5, 0.7]:
	distortions[f"Clipping_{thr}"] = np.clip(ref, -thr, thr)

	if ("vibrato" in distortion_keys) or distortion_keys == "all":
	for rate, depth in zip([3, 5, 7], [0.001, 0.002, 0.003]):
	vibrato = np.sin(2 * np.pi * rate * t) * depth
	vibrato_signal = librosa.effects.time_stretch(
	ref, rate=1 + float(vibrato.mean()), n_fft=min(2048, len(ref) // 2)
	)
	distortions[f"Vibrato_{rate}Hz"] = librosa.util.fix_length(
	vibrato_signal, size=len(ref)
	)

	return list(distortions.values())