MoYoYo.tts / tasks /task_slice_data.py
liumaolin
Add GPT-SoVITS training pipeline with audio slicing, transcription, and model training modules.
1b05d02
import os
import traceback
from subprocess import Popen
from time import sleep
from loguru import logger
from env import PROJECT_ROOT, PYTHON_EXECUTABLE
from tasks.kill_process import kill_process
from tasks.process import BaseProcess
from tools import my_utils
class AudioSliceTask(BaseProcess):
def __init__(
self,
slice_inp_path,
slice_opt_root,
threshold=-34,
min_length=4000,
min_interval=300,
hop_size=10,
max_sil_kept=500,
max=0.9,
alpha=0.25,
n_process=4,
):
super().__init__()
self.slice_inp_path = slice_inp_path
self.slice_opt_root = slice_opt_root
self.threshold = threshold
self.min_length = min_length
self.min_interval = min_interval
self.hop_size = hop_size
self.max_sil_kept = max_sil_kept
self.max = max
self.alpha = alpha
self.n_process = n_process
def run(self):
inp = my_utils.clean_path(self.slice_inp_path)
opt_root = my_utils.clean_path(self.slice_opt_root)
if os.path.exists(opt_root):
import shutil
shutil.rmtree(opt_root)
if not os.path.exists(inp):
logger.warning('输入路径不存在')
return
if os.path.isfile(inp):
n_parts = 1
else:
n_parts = self.n_process
slice_audio_filepath = PROJECT_ROOT / 'tools' / 'slice_audio.py'
ps_slice = []
for i_part in range(n_parts):
cmd = f'PYTHONPATH=$(pwd) "%s" {slice_audio_filepath} "%s" "%s" %s %s %s %s %s %s %s %s %s''' % (
PYTHON_EXECUTABLE, inp, opt_root,
self.threshold, self.min_length, self.min_interval,
self.hop_size, self.max_sil_kept, self.max, self.alpha, i_part, n_parts)
logger.info(cmd)
p = Popen(cmd, shell=True)
ps_slice.append(p)
logger.info("切割执行中......")
while any(p.poll() is None for p in ps_slice):
# 提前结束任务
if self.stopped():
logger.info("slice任务被提前终止")
for p in ps_slice:
try:
kill_process(p.pid)
except:
traceback.print_exc()
break
sleep(1)
for p in ps_slice:
logger.info(p.poll())
logger.info("切割完成")
if __name__ == '__main__':
# 音频自动切分输入路径,可文件可文件夹
slice_inp_path = '/home/wjr/code/sealai/GPT-SoVITS/train_data/naxida.wav'
# 切分后的子音频的输出根目录
slice_opt_root = 'data/naxida/slicer'
# 音量小于这个值视作静音的备选切割点
threshold = -34
# 每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值
min_length = 4000
# 最短切割间隔
min_interval = 300
# 怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)
hop_size = 10
# 切完后静音最多留多长
max_sil_kept = 500
# 归一化后最大值多少
_max = 0.9
# 混多少比例归一化后音频进来
alpha = 0.25
# 切割使用的进程数
n_process = 4
# slicer = AudioSlicer(threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_process)
# slicer.start(slice_inp_path, slice_opt_root)
task = AudioSliceTask(slice_inp_path, slice_opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_process)
task.start()
# sleep(2)
# task.stop()
task.join()
# task.stop()
# logger.info(task.exitcode)