Spaces:
Running
Running
jhj0517
commited on
Commit
·
ac480c2
1
Parent(s):
4da9545
Limit Vad to only faster-whisper
Browse files
modules/whisper/faster_whisper_inference.py
CHANGED
|
@@ -71,6 +71,20 @@ class FasterWhisperInference(WhisperBase):
|
|
| 71 |
if not params.hotwords:
|
| 72 |
params.hotwords = None
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
params.suppress_tokens = self.format_suppress_tokens_str(params.suppress_tokens)
|
| 75 |
|
| 76 |
segments, info = self.model.transcribe(
|
|
@@ -100,7 +114,9 @@ class FasterWhisperInference(WhisperBase):
|
|
| 100 |
hotwords=params.hotwords,
|
| 101 |
language_detection_threshold=params.language_detection_threshold,
|
| 102 |
language_detection_segments=params.language_detection_segments,
|
| 103 |
-
prompt_reset_on_temperature=params.prompt_reset_on_temperature
|
|
|
|
|
|
|
| 104 |
)
|
| 105 |
progress(0, desc="Loading audio..")
|
| 106 |
|
|
|
|
| 71 |
if not params.hotwords:
|
| 72 |
params.hotwords = None
|
| 73 |
|
| 74 |
+
vad_options = None
|
| 75 |
+
if params.vad_filter:
|
| 76 |
+
# Explicit value set for float('inf') from gr.Number()
|
| 77 |
+
if params.max_speech_duration_s >= 9999:
|
| 78 |
+
params.max_speech_duration_s = float('inf')
|
| 79 |
+
|
| 80 |
+
vad_options = VadOptions(
|
| 81 |
+
threshold=params.threshold,
|
| 82 |
+
min_speech_duration_ms=params.min_speech_duration_ms,
|
| 83 |
+
max_speech_duration_s=params.max_speech_duration_s,
|
| 84 |
+
min_silence_duration_ms=params.min_silence_duration_ms,
|
| 85 |
+
speech_pad_ms=params.speech_pad_ms
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
params.suppress_tokens = self.format_suppress_tokens_str(params.suppress_tokens)
|
| 89 |
|
| 90 |
segments, info = self.model.transcribe(
|
|
|
|
| 114 |
hotwords=params.hotwords,
|
| 115 |
language_detection_threshold=params.language_detection_threshold,
|
| 116 |
language_detection_segments=params.language_detection_segments,
|
| 117 |
+
prompt_reset_on_temperature=params.prompt_reset_on_temperature,
|
| 118 |
+
vad_filter=params.vad_filter,
|
| 119 |
+
vad_parameters=vad_options
|
| 120 |
)
|
| 121 |
progress(0, desc="Loading audio..")
|
| 122 |
|
modules/whisper/whisper_base.py
CHANGED
|
@@ -85,20 +85,6 @@ class WhisperBase(ABC):
|
|
| 85 |
"""
|
| 86 |
params = WhisperParameters.as_value(*whisper_params)
|
| 87 |
|
| 88 |
-
if params.vad_filter:
|
| 89 |
-
vad_options = VadOptions(
|
| 90 |
-
threshold=params.threshold,
|
| 91 |
-
min_speech_duration_ms=params.min_speech_duration_ms,
|
| 92 |
-
max_speech_duration_s=params.max_speech_duration_s,
|
| 93 |
-
min_silence_duration_ms=params.min_silence_duration_ms,
|
| 94 |
-
speech_pad_ms=params.speech_pad_ms
|
| 95 |
-
)
|
| 96 |
-
audio = self.vad.run(
|
| 97 |
-
audio=audio,
|
| 98 |
-
vad_parameters=vad_options,
|
| 99 |
-
progress=progress
|
| 100 |
-
)
|
| 101 |
-
|
| 102 |
if params.lang == "Automatic Detection":
|
| 103 |
params.lang = None
|
| 104 |
else:
|
|
|
|
| 85 |
"""
|
| 86 |
params = WhisperParameters.as_value(*whisper_params)
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
if params.lang == "Automatic Detection":
|
| 89 |
params.lang = None
|
| 90 |
else:
|