Spaces:

soiz1
/

Whisper-WebUI

Running

App Files Files Community

Whisper-WebUI / tests /test_transcription.py

soiz1

Upload 109 files

9aaf513 verified 10 months ago

raw

history blame contribute delete

2.8 kB

	from modules.whisper.whisper_factory import WhisperFactory
	from modules.whisper.data_classes import *
	from modules.utils.subtitle_manager import read_file
	from modules.utils.paths import WEBUI_DIR
	from test_config import *

	import requests
	import pytest
	import gradio as gr
	import os


	@pytest.mark.parametrize(
	"whisper_type,vad_filter,bgm_separation,diarization",
	[
	(WhisperImpl.WHISPER.value, False, False, False),
	(WhisperImpl.FASTER_WHISPER.value, False, False, False),
	(WhisperImpl.INSANELY_FAST_WHISPER.value, False, False, False)
	]
	)
	def test_transcribe(
	whisper_type: str,
	vad_filter: bool,
	bgm_separation: bool,
	diarization: bool,
	):
	audio_path = TEST_FILE_PATH

	answer = TEST_ANSWER
	if diarization:
	answer = "SPEAKER_00\|"+TEST_ANSWER

	whisper_inferencer = WhisperFactory.create_whisper_inference(
	whisper_type=whisper_type,
	)
	print(
	f"""Whisper Device : {whisper_inferencer.device}\n"""
	f"""BGM Separation Device: {whisper_inferencer.music_separator.device}\n"""
	f"""Diarization Device: {whisper_inferencer.diarizer.device}"""
	)

	hparams = TranscriptionPipelineParams(
	whisper=WhisperParams(
	model_size=TEST_WHISPER_MODEL,
	compute_type=whisper_inferencer.current_compute_type
	),
	vad=VadParams(
	vad_filter=vad_filter
	),
	bgm_separation=BGMSeparationParams(
	is_separate_bgm=bgm_separation,
	enable_offload=True
	),
	diarization=DiarizationParams(
	is_diarize=diarization
	),
	).to_list()

	subtitle_str, file_paths = whisper_inferencer.transcribe_file(
	[audio_path],
	None,
	None,
	None,
	"SRT",
	False,
	gr.Progress(),
	*hparams,
	)
	subtitle = read_file(file_paths[0]).split("\n")
	assert calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) < 0.1

	if not is_pytube_detected_bot():
	subtitle_str, file_path = whisper_inferencer.transcribe_youtube(
	TEST_YOUTUBE_URL,
	"SRT",
	False,
	gr.Progress(),
	*hparams,
	)
	assert isinstance(subtitle_str, str) and subtitle_str
	assert os.path.exists(file_path)

	subtitle_str, file_path = whisper_inferencer.transcribe_mic(
	audio_path,
	"SRT",
	False,
	gr.Progress(),
	*hparams,
	)
	subtitle = read_file(file_path).split("\n")
	wer = calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", ""))
	assert wer < 0.1, f"WER is too high, it's {wer}"