Spaces:
Running
Running
| import torch | |
| from tqdm import tqdm | |
| from multiprocessing import Pool | |
| from mel_processing import spectrogram_torch, mel_spectrogram_torch | |
| from utils import load_wav_to_torch | |
| class AudioProcessor: | |
| def __init__( | |
| self, | |
| max_wav_value, | |
| use_mel_spec_posterior, | |
| filter_length, | |
| n_mel_channels, | |
| sampling_rate, | |
| hop_length, | |
| win_length, | |
| mel_fmin, | |
| mel_fmax, | |
| ): | |
| self.max_wav_value = max_wav_value | |
| self.use_mel_spec_posterior = use_mel_spec_posterior | |
| self.filter_length = filter_length | |
| self.n_mel_channels = n_mel_channels | |
| self.sampling_rate = sampling_rate | |
| self.hop_length = hop_length | |
| self.win_length = win_length | |
| self.mel_fmin = mel_fmin | |
| self.mel_fmax = mel_fmax | |
| def process_audio(self, filename): | |
| audio, sampling_rate = load_wav_to_torch(filename) | |
| audio_norm = audio / self.max_wav_value | |
| audio_norm = audio_norm.unsqueeze(0) | |
| spec_filename = filename.replace(".wav", ".spec.pt") | |
| if self.use_mel_spec_posterior: | |
| spec_filename = spec_filename.replace(".spec.pt", ".mel.pt") | |
| try: | |
| spec = torch.load(spec_filename) | |
| except: | |
| if self.use_mel_spec_posterior: | |
| spec = mel_spectrogram_torch( | |
| audio_norm, | |
| self.filter_length, | |
| self.n_mel_channels, | |
| self.sampling_rate, | |
| self.hop_length, | |
| self.win_length, | |
| self.mel_fmin, | |
| self.mel_fmax, | |
| center=False, | |
| ) | |
| else: | |
| spec = spectrogram_torch( | |
| audio_norm, | |
| self.filter_length, | |
| self.sampling_rate, | |
| self.hop_length, | |
| self.win_length, | |
| center=False, | |
| ) | |
| spec = torch.squeeze(spec, 0) | |
| torch.save(spec, spec_filename) | |
| return spec, audio_norm | |
| # 使用示例 | |
| processor = AudioProcessor( | |
| max_wav_value=32768.0, | |
| use_mel_spec_posterior=False, | |
| filter_length=2048, | |
| n_mel_channels=128, | |
| sampling_rate=44100, | |
| hop_length=512, | |
| win_length=2048, | |
| mel_fmin=0.0, | |
| mel_fmax="null", | |
| ) | |
| with open("filelists/train.list", "r") as f: | |
| filepaths = [line.split("|")[0] for line in f] # 取每一行的第一部分作为audiopath | |
| # 使用多进程处理 | |
| with Pool(processes=32) as pool: # 使用4个进程 | |
| with tqdm(total=len(filepaths)) as pbar: | |
| for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)): | |
| pbar.update() | |