liumaolin
Add GPT-SoVITS training pipeline with audio slicing, transcription, and model training modules.
1b05d02
| import os | |
| import traceback | |
| from subprocess import Popen | |
| from time import sleep | |
| from loguru import logger | |
| from env import PROJECT_ROOT, PYTHON_EXECUTABLE | |
| from tasks.kill_process import kill_process | |
| from tasks.process import BaseProcess | |
| from tools import my_utils | |
| class AudioSliceTask(BaseProcess): | |
| def __init__( | |
| self, | |
| slice_inp_path, | |
| slice_opt_root, | |
| threshold=-34, | |
| min_length=4000, | |
| min_interval=300, | |
| hop_size=10, | |
| max_sil_kept=500, | |
| max=0.9, | |
| alpha=0.25, | |
| n_process=4, | |
| ): | |
| super().__init__() | |
| self.slice_inp_path = slice_inp_path | |
| self.slice_opt_root = slice_opt_root | |
| self.threshold = threshold | |
| self.min_length = min_length | |
| self.min_interval = min_interval | |
| self.hop_size = hop_size | |
| self.max_sil_kept = max_sil_kept | |
| self.max = max | |
| self.alpha = alpha | |
| self.n_process = n_process | |
| def run(self): | |
| inp = my_utils.clean_path(self.slice_inp_path) | |
| opt_root = my_utils.clean_path(self.slice_opt_root) | |
| if os.path.exists(opt_root): | |
| import shutil | |
| shutil.rmtree(opt_root) | |
| if not os.path.exists(inp): | |
| logger.warning('输入路径不存在') | |
| return | |
| if os.path.isfile(inp): | |
| n_parts = 1 | |
| else: | |
| n_parts = self.n_process | |
| slice_audio_filepath = PROJECT_ROOT / 'tools' / 'slice_audio.py' | |
| ps_slice = [] | |
| for i_part in range(n_parts): | |
| cmd = f'PYTHONPATH=$(pwd) "%s" {slice_audio_filepath} "%s" "%s" %s %s %s %s %s %s %s %s %s''' % ( | |
| PYTHON_EXECUTABLE, inp, opt_root, | |
| self.threshold, self.min_length, self.min_interval, | |
| self.hop_size, self.max_sil_kept, self.max, self.alpha, i_part, n_parts) | |
| logger.info(cmd) | |
| p = Popen(cmd, shell=True) | |
| ps_slice.append(p) | |
| logger.info("切割执行中......") | |
| while any(p.poll() is None for p in ps_slice): | |
| # 提前结束任务 | |
| if self.stopped(): | |
| logger.info("slice任务被提前终止") | |
| for p in ps_slice: | |
| try: | |
| kill_process(p.pid) | |
| except: | |
| traceback.print_exc() | |
| break | |
| sleep(1) | |
| for p in ps_slice: | |
| logger.info(p.poll()) | |
| logger.info("切割完成") | |
| if __name__ == '__main__': | |
| # 音频自动切分输入路径,可文件可文件夹 | |
| slice_inp_path = '/home/wjr/code/sealai/GPT-SoVITS/train_data/naxida.wav' | |
| # 切分后的子音频的输出根目录 | |
| slice_opt_root = 'data/naxida/slicer' | |
| # 音量小于这个值视作静音的备选切割点 | |
| threshold = -34 | |
| # 每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值 | |
| min_length = 4000 | |
| # 最短切割间隔 | |
| min_interval = 300 | |
| # 怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好) | |
| hop_size = 10 | |
| # 切完后静音最多留多长 | |
| max_sil_kept = 500 | |
| # 归一化后最大值多少 | |
| _max = 0.9 | |
| # 混多少比例归一化后音频进来 | |
| alpha = 0.25 | |
| # 切割使用的进程数 | |
| n_process = 4 | |
| # slicer = AudioSlicer(threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_process) | |
| # slicer.start(slice_inp_path, slice_opt_root) | |
| task = AudioSliceTask(slice_inp_path, slice_opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_process) | |
| task.start() | |
| # sleep(2) | |
| # task.stop() | |
| task.join() | |
| # task.stop() | |
| # logger.info(task.exitcode) |