Spaces:
Running
on
T4
Running
on
T4
| import argparse | |
| from functools import partial | |
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| from resemble_enhance.enhancer.inference import denoise, enhance | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| else: | |
| device = "cpu" | |
| def _fn(path, solver, nfe, tau, denoising, unlimited): | |
| if path is None: | |
| gr.Warning("Please upload an audio file.") | |
| return None, None | |
| info = torchaudio.info(path) | |
| if not unlimited and (info.num_frames / info.sample_rate > 60): | |
| gr.Warning("Only audio files shorter than 60 seconds are supported.") | |
| return None, None | |
| solver = solver.lower() | |
| nfe = int(nfe) | |
| lambd = 0.9 if denoising else 0.1 | |
| dwav, sr = torchaudio.load(path) | |
| dwav = dwav.mean(dim=0) | |
| wav1, new_sr = denoise(dwav, sr, device) | |
| wav2, new_sr = enhance(dwav, sr, device, nfe=nfe, solver=solver, lambd=lambd, tau=tau) | |
| wav1 = wav1.cpu().numpy() | |
| wav2 = wav2.cpu().numpy() | |
| return (new_sr, wav1), (new_sr, wav2) | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--unlimited", action="store_true") | |
| args = parser.parse_args() | |
| inputs: list = [ | |
| gr.Audio(type="filepath", label="Input Audio"), | |
| gr.Dropdown( | |
| choices=["Midpoint", "RK4", "Euler"], | |
| value="Midpoint", | |
| label="CFM ODE Solver (Midpoint is recommended)", | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=128, | |
| value=64, | |
| step=1, | |
| label="CFM Number of Function Evaluations (higher values in general yield better quality but may be slower)", | |
| ), | |
| gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=0.5, | |
| step=0.01, | |
| label="CFM Prior Temperature (higher values can improve quality but can reduce stability)", | |
| ), | |
| gr.Checkbox( | |
| value=False, | |
| label="Denoise Before Enhancement (tick if your audio contains heavy background noise)", | |
| ), | |
| ] | |
| outputs: list = [ | |
| gr.Audio(label="Output Denoised Audio"), | |
| gr.Audio(label="Output Enhanced Audio"), | |
| ] | |
| interface = gr.Interface( | |
| fn=partial(_fn, unlimited=args.unlimited), | |
| title="Resemble Enhance", | |
| description="AI-driven audio enhancement for your audio files, powered by Resemble AI.", | |
| inputs=inputs, | |
| outputs=outputs, | |
| ) | |
| interface.launch() | |
| if __name__ == "__main__": | |
| main() | |