Update main.py
Browse files
main.py
CHANGED
|
@@ -8,7 +8,6 @@ import random
|
|
| 8 |
import numpy as np
|
| 9 |
from scipy.signal.windows import hann
|
| 10 |
import soundfile as sf
|
| 11 |
-
import torch
|
| 12 |
import librosa
|
| 13 |
from audiosr import build_model, super_resolution
|
| 14 |
from scipy import signal
|
|
@@ -16,25 +15,27 @@ import pyloudnorm as pyln
|
|
| 16 |
import tempfile
|
| 17 |
import spaces
|
| 18 |
|
|
|
|
| 19 |
class AudioUpscaler:
|
| 20 |
"""
|
| 21 |
Upscales audio using the AudioSR model.
|
| 22 |
"""
|
| 23 |
|
| 24 |
-
def __init__(self, model_name="basic", device="
|
| 25 |
"""
|
| 26 |
Initializes the AudioUpscaler.
|
| 27 |
|
| 28 |
Args:
|
| 29 |
model_name (str, optional): Name of the AudioSR model to use. Defaults to "basic".
|
| 30 |
-
device (str, optional): Device to use for inference. Defaults to "
|
| 31 |
"""
|
| 32 |
|
| 33 |
self.model_name = model_name
|
| 34 |
self.device = device
|
| 35 |
-
self.sr =
|
| 36 |
self.audiosr = None # Model will be loaded in setup()
|
| 37 |
-
|
|
|
|
| 38 |
def setup(self):
|
| 39 |
"""
|
| 40 |
Loads the AudioSR model.
|
|
@@ -107,12 +108,12 @@ class AudioUpscaler:
|
|
| 107 |
self,
|
| 108 |
input_file,
|
| 109 |
chunk_size=5.12,
|
| 110 |
-
overlap=0.
|
| 111 |
seed=None,
|
| 112 |
guidance_scale=3.5,
|
| 113 |
ddim_steps=50,
|
| 114 |
multiband_ensemble=True,
|
| 115 |
-
input_cutoff=
|
| 116 |
):
|
| 117 |
"""
|
| 118 |
Processes the audio in chunks and performs upsampling.
|
|
@@ -130,7 +131,7 @@ class AudioUpscaler:
|
|
| 130 |
Returns:
|
| 131 |
np.ndarray: Upsampled audio data.
|
| 132 |
"""
|
| 133 |
-
|
| 134 |
audio, sr = librosa.load(input_file, sr=input_cutoff * 2, mono=False)
|
| 135 |
audio = audio.T
|
| 136 |
sr = input_cutoff * 2
|
|
@@ -141,12 +142,13 @@ class AudioUpscaler:
|
|
| 141 |
else:
|
| 142 |
audio_ch1 = audio
|
| 143 |
|
| 144 |
-
chunk_samples
|
| 145 |
overlap_samples = int(overlap * chunk_samples)
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
| 150 |
|
| 151 |
def process_chunks(audio):
|
| 152 |
chunks = []
|
|
@@ -320,7 +322,7 @@ class AudioUpscaler:
|
|
| 320 |
chunk_size=10.24,
|
| 321 |
seed=None,
|
| 322 |
multiband_ensemble=True,
|
| 323 |
-
input_cutoff=
|
| 324 |
):
|
| 325 |
"""
|
| 326 |
Upscales the audio and saves the result.
|
|
@@ -338,6 +340,7 @@ class AudioUpscaler:
|
|
| 338 |
"""
|
| 339 |
if seed == 0:
|
| 340 |
seed = random.randint(0, 2**32 - 1)
|
|
|
|
| 341 |
|
| 342 |
os.makedirs(output_folder, exist_ok=True)
|
| 343 |
waveform = self._process_audio(
|
|
@@ -385,7 +388,6 @@ def inference(audio_file, model_name, guidance_scale, ddim_steps, seed):
|
|
| 385 |
|
| 386 |
return (48000, waveform)
|
| 387 |
|
| 388 |
-
@spaces.GPU(duration=300)
|
| 389 |
def upscale_audio(
|
| 390 |
input_file,
|
| 391 |
output_folder,
|
|
@@ -415,6 +417,7 @@ def upscale_audio(
|
|
| 415 |
tuple: Upscaled audio data and sample rate.
|
| 416 |
"""
|
| 417 |
torch.cuda.empty_cache()
|
|
|
|
| 418 |
|
| 419 |
gc.collect()
|
| 420 |
upscaler = AudioUpscaler()
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
from scipy.signal.windows import hann
|
| 10 |
import soundfile as sf
|
|
|
|
| 11 |
import librosa
|
| 12 |
from audiosr import build_model, super_resolution
|
| 13 |
from scipy import signal
|
|
|
|
| 15 |
import tempfile
|
| 16 |
import spaces
|
| 17 |
|
| 18 |
+
|
| 19 |
class AudioUpscaler:
|
| 20 |
"""
|
| 21 |
Upscales audio using the AudioSR model.
|
| 22 |
"""
|
| 23 |
|
| 24 |
+
def __init__(self, model_name="basic", device="cuda"):
|
| 25 |
"""
|
| 26 |
Initializes the AudioUpscaler.
|
| 27 |
|
| 28 |
Args:
|
| 29 |
model_name (str, optional): Name of the AudioSR model to use. Defaults to "basic".
|
| 30 |
+
device (str, optional): Device to use for inference. Defaults to "cuda".
|
| 31 |
"""
|
| 32 |
|
| 33 |
self.model_name = model_name
|
| 34 |
self.device = device
|
| 35 |
+
self.sr = 44100
|
| 36 |
self.audiosr = None # Model will be loaded in setup()
|
| 37 |
+
|
| 38 |
+
@spaces.GPU(duration=120)
|
| 39 |
def setup(self):
|
| 40 |
"""
|
| 41 |
Loads the AudioSR model.
|
|
|
|
| 108 |
self,
|
| 109 |
input_file,
|
| 110 |
chunk_size=5.12,
|
| 111 |
+
overlap=0.16,
|
| 112 |
seed=None,
|
| 113 |
guidance_scale=3.5,
|
| 114 |
ddim_steps=50,
|
| 115 |
multiband_ensemble=True,
|
| 116 |
+
input_cutoff=8000,
|
| 117 |
):
|
| 118 |
"""
|
| 119 |
Processes the audio in chunks and performs upsampling.
|
|
|
|
| 131 |
Returns:
|
| 132 |
np.ndarray: Upsampled audio data.
|
| 133 |
"""
|
| 134 |
+
chunk_size = random.randint(a=0, b=10)*0.08
|
| 135 |
audio, sr = librosa.load(input_file, sr=input_cutoff * 2, mono=False)
|
| 136 |
audio = audio.T
|
| 137 |
sr = input_cutoff * 2
|
|
|
|
| 142 |
else:
|
| 143 |
audio_ch1 = audio
|
| 144 |
|
| 145 |
+
chunk_samples = int(chunk_size * sr)
|
| 146 |
overlap_samples = int(overlap * chunk_samples)
|
| 147 |
|
| 148 |
+
|
| 149 |
+
output_chunk_samples = int(chunk_size * self.sr)
|
| 150 |
+
output_overlap_samples = int(overlap * output_chunk_samples)
|
| 151 |
+
enable_overlap = True if overlap > 0 else False
|
| 152 |
|
| 153 |
def process_chunks(audio):
|
| 154 |
chunks = []
|
|
|
|
| 322 |
chunk_size=10.24,
|
| 323 |
seed=None,
|
| 324 |
multiband_ensemble=True,
|
| 325 |
+
input_cutoff=8000,
|
| 326 |
):
|
| 327 |
"""
|
| 328 |
Upscales the audio and saves the result.
|
|
|
|
| 340 |
"""
|
| 341 |
if seed == 0:
|
| 342 |
seed = random.randint(0, 2**32 - 1)
|
| 343 |
+
chunk_size = random.randint(0, 10) * 0.08
|
| 344 |
|
| 345 |
os.makedirs(output_folder, exist_ok=True)
|
| 346 |
waveform = self._process_audio(
|
|
|
|
| 388 |
|
| 389 |
return (48000, waveform)
|
| 390 |
|
|
|
|
| 391 |
def upscale_audio(
|
| 392 |
input_file,
|
| 393 |
output_folder,
|
|
|
|
| 417 |
tuple: Upscaled audio data and sample rate.
|
| 418 |
"""
|
| 419 |
torch.cuda.empty_cache()
|
| 420 |
+
chunk_size = random.randint(a=0, b=10)*0.08
|
| 421 |
|
| 422 |
gc.collect()
|
| 423 |
upscaler = AudioUpscaler()
|