MoYoYo.tts / tasks /task_asr_data.py
liumaolin
Add GPT-SoVITS training pipeline with audio slicing, transcription, and model training modules.
1b05d02
from subprocess import Popen
from time import sleep
from loguru import logger
from env import PROJECT_ROOT, IS_HALF, PYTHON_EXECUTABLE
from tasks.kill_process import kill_process
from tasks.process import BaseProcess
from tools import my_utils
from tools.asr.config import asr_dict
class AsrTask(BaseProcess):
def __init__(self,
asr_inp_dir,
asr_opt_dir="output/asr_opt",
asr_model="达摩 ASR (中文)",
asr_model_size='large',
asr_lang='zh'):
super().__init__()
self.asr_inp_dir = asr_inp_dir
self.asr_opt_dir = asr_opt_dir
self.asr_model = asr_model
self.asr_model_size = asr_model_size
self.asr_lang = asr_lang
def run(self):
asr_inp_dir = my_utils.clean_path(self.asr_inp_dir)
asr_opt_dir = my_utils.clean_path(self.asr_opt_dir)
asr_filepath = PROJECT_ROOT / 'tools' / 'asr' / f'{asr_dict[self.asr_model]["path"]}'
cmd = f'PYTHONPATH=. "{PYTHON_EXECUTABLE}" {asr_filepath}'
cmd += f' -i "{asr_inp_dir}"'
cmd += f' -o "{asr_opt_dir}"'
cmd += f' -s {self.asr_model_size}'
cmd += f' -l {self.asr_lang}'
cmd += " -p %s" % ("float16" if IS_HALF == True else "float32")
logger.info("ASR任务开启......")
logger.info(cmd)
p_asr = Popen(cmd, shell=True)
while p_asr.poll() is None:
if self.stopped():
logger.info("asr任务被提前终止")
kill_process(p_asr.pid)
break
sleep(1)
logger.info("ASR任务完成, 查看终端进行下一步")
if __name__ == "__main__":
asr_config = {
'Faster Whisper (多语种)': ['Faster Whisper (多语种)', 'tiny', 'en'],
'达摩 ASR (中文)': ["达摩 ASR (中文)", 'large', 'zh']
}
task = AsrTask('data/naxida/slicer', 'data/naxida/asr_opt', *asr_config['Faster Whisper (多语种)'])
task.start()
# sleep(20)
# task.stop()
task.join()