Spaces:
Running
Running
| from modules.whisper.whisper_factory import WhisperFactory | |
| from modules.whisper.data_classes import * | |
| from modules.utils.subtitle_manager import read_file | |
| from modules.utils.paths import WEBUI_DIR | |
| from test_config import * | |
| import requests | |
| import pytest | |
| import gradio as gr | |
| import os | |
| def test_transcribe( | |
| whisper_type: str, | |
| vad_filter: bool, | |
| bgm_separation: bool, | |
| diarization: bool, | |
| ): | |
| audio_path = TEST_FILE_PATH | |
| answer = TEST_ANSWER | |
| if diarization: | |
| answer = "SPEAKER_00|"+TEST_ANSWER | |
| whisper_inferencer = WhisperFactory.create_whisper_inference( | |
| whisper_type=whisper_type, | |
| ) | |
| print( | |
| f"""Whisper Device : {whisper_inferencer.device}\n""" | |
| f"""BGM Separation Device: {whisper_inferencer.music_separator.device}\n""" | |
| f"""Diarization Device: {whisper_inferencer.diarizer.device}""" | |
| ) | |
| hparams = TranscriptionPipelineParams( | |
| whisper=WhisperParams( | |
| model_size=TEST_WHISPER_MODEL, | |
| compute_type=whisper_inferencer.current_compute_type | |
| ), | |
| vad=VadParams( | |
| vad_filter=vad_filter | |
| ), | |
| bgm_separation=BGMSeparationParams( | |
| is_separate_bgm=bgm_separation, | |
| enable_offload=True | |
| ), | |
| diarization=DiarizationParams( | |
| is_diarize=diarization | |
| ), | |
| ).to_list() | |
| subtitle_str, file_paths = whisper_inferencer.transcribe_file( | |
| [audio_path], | |
| None, | |
| None, | |
| None, | |
| "SRT", | |
| False, | |
| gr.Progress(), | |
| *hparams, | |
| ) | |
| subtitle = read_file(file_paths[0]).split("\n") | |
| assert calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) < 0.1 | |
| if not is_pytube_detected_bot(): | |
| subtitle_str, file_path = whisper_inferencer.transcribe_youtube( | |
| TEST_YOUTUBE_URL, | |
| "SRT", | |
| False, | |
| gr.Progress(), | |
| *hparams, | |
| ) | |
| assert isinstance(subtitle_str, str) and subtitle_str | |
| assert os.path.exists(file_path) | |
| subtitle_str, file_path = whisper_inferencer.transcribe_mic( | |
| audio_path, | |
| "SRT", | |
| False, | |
| gr.Progress(), | |
| *hparams, | |
| ) | |
| subtitle = read_file(file_path).split("\n") | |
| wer = calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) | |
| assert wer < 0.1, f"WER is too high, it's {wer}" | |