Spaces:
Running
Running
jhj0517
commited on
Commit
·
51f672d
1
Parent(s):
6d61e4e
add some args and fix timestamp bug
Browse files
modules/insanely_fast_whisper_inference.py
CHANGED
|
@@ -9,6 +9,8 @@ import gradio as gr
|
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
import whisper
|
| 11 |
|
|
|
|
|
|
|
| 12 |
from modules.whisper_parameter import *
|
| 13 |
from modules.whisper_base import WhisperBase
|
| 14 |
|
|
@@ -55,14 +57,32 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
| 55 |
|
| 56 |
if params.lang == "Automatic Detection":
|
| 57 |
params.lang = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
progress(0, desc="Transcribing...")
|
| 60 |
-
segments = self.model(
|
| 61 |
-
inputs=audio,
|
| 62 |
-
chunk_length_s=30,
|
| 63 |
-
batch_size=24,
|
| 64 |
-
return_timestamps=True,
|
| 65 |
-
)
|
| 66 |
segments_result = self.format_result(
|
| 67 |
transcribed_result=segments,
|
| 68 |
)
|
|
@@ -98,7 +118,6 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
| 98 |
|
| 99 |
self.current_compute_type = compute_type
|
| 100 |
self.current_model_size = model_size
|
| 101 |
-
|
| 102 |
self.model = pipeline(
|
| 103 |
"automatic-speech-recognition",
|
| 104 |
model=os.path.join(self.model_dir, model_size),
|
|
@@ -118,8 +137,6 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
| 118 |
----------
|
| 119 |
transcribed_result: dict
|
| 120 |
Transcription result of the insanely_fast_whisper
|
| 121 |
-
progress: gr.Progress
|
| 122 |
-
Indicator to show progress directly in gradio.
|
| 123 |
|
| 124 |
Returns
|
| 125 |
----------
|
|
@@ -129,6 +146,8 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
| 129 |
result = transcribed_result["chunks"]
|
| 130 |
for item in result:
|
| 131 |
start, end = item["timestamp"][0], item["timestamp"][1]
|
|
|
|
|
|
|
| 132 |
item["start"] = start
|
| 133 |
item["end"] = end
|
| 134 |
return result
|
|
|
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
import whisper
|
| 11 |
|
| 12 |
+
from rich.progress import Progress, TimeElapsedColumn, BarColumn, TextColumn
|
| 13 |
+
|
| 14 |
from modules.whisper_parameter import *
|
| 15 |
from modules.whisper_base import WhisperBase
|
| 16 |
|
|
|
|
| 57 |
|
| 58 |
if params.lang == "Automatic Detection":
|
| 59 |
params.lang = None
|
| 60 |
+
else:
|
| 61 |
+
language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
|
| 62 |
+
params.lang = language_code_dict[params.lang]
|
| 63 |
+
|
| 64 |
+
progress(0, desc="Transcribing...Progress is not shown in insanely-fast-whisper.")
|
| 65 |
+
with Progress(
|
| 66 |
+
TextColumn("[progress.description]{task.description}"),
|
| 67 |
+
BarColumn(style="yellow1", pulse_style="white"),
|
| 68 |
+
TimeElapsedColumn(),
|
| 69 |
+
) as progress:
|
| 70 |
+
progress.add_task("[yellow]Transcribing...", total=None)
|
| 71 |
+
|
| 72 |
+
segments = self.model(
|
| 73 |
+
inputs=audio,
|
| 74 |
+
return_timestamps=True,
|
| 75 |
+
chunk_length_s=30,
|
| 76 |
+
batch_size=24,
|
| 77 |
+
generate_kwargs={
|
| 78 |
+
"language": params.lang,
|
| 79 |
+
"task": "translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe",
|
| 80 |
+
"no_speech_threshold": params.no_speech_threshold,
|
| 81 |
+
"temperature": params.temperature,
|
| 82 |
+
"compression_ratio_threshold": params.compression_ratio_threshold
|
| 83 |
+
}
|
| 84 |
+
)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
segments_result = self.format_result(
|
| 87 |
transcribed_result=segments,
|
| 88 |
)
|
|
|
|
| 118 |
|
| 119 |
self.current_compute_type = compute_type
|
| 120 |
self.current_model_size = model_size
|
|
|
|
| 121 |
self.model = pipeline(
|
| 122 |
"automatic-speech-recognition",
|
| 123 |
model=os.path.join(self.model_dir, model_size),
|
|
|
|
| 137 |
----------
|
| 138 |
transcribed_result: dict
|
| 139 |
Transcription result of the insanely_fast_whisper
|
|
|
|
|
|
|
| 140 |
|
| 141 |
Returns
|
| 142 |
----------
|
|
|
|
| 146 |
result = transcribed_result["chunks"]
|
| 147 |
for item in result:
|
| 148 |
start, end = item["timestamp"][0], item["timestamp"][1]
|
| 149 |
+
if end is None:
|
| 150 |
+
end = start
|
| 151 |
item["start"] = start
|
| 152 |
item["end"] = end
|
| 153 |
return result
|