Spaces:
Running
Running
Merge pull request #380 from linuxlurak/master-mod
Browse filesInclude loading of default value of file_format from config file.
- app.py +1 -1
- configs/default_parameters.yaml +1 -0
- modules/whisper/base_transcription_pipeline.py +10 -1
app.py
CHANGED
|
@@ -53,7 +53,7 @@ class App:
|
|
| 53 |
dd_lang = gr.Dropdown(choices=self.whisper_inf.available_langs + [AUTOMATIC_DETECTION],
|
| 54 |
value=AUTOMATIC_DETECTION if whisper_params["lang"] == AUTOMATIC_DETECTION.unwrap()
|
| 55 |
else whisper_params["lang"], label=_("Language"))
|
| 56 |
-
dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt", "LRC"], value="
|
| 57 |
with gr.Row():
|
| 58 |
cb_translate = gr.Checkbox(value=whisper_params["is_translate"], label=_("Translate to English?"),
|
| 59 |
interactive=True)
|
|
|
|
| 53 |
dd_lang = gr.Dropdown(choices=self.whisper_inf.available_langs + [AUTOMATIC_DETECTION],
|
| 54 |
value=AUTOMATIC_DETECTION if whisper_params["lang"] == AUTOMATIC_DETECTION.unwrap()
|
| 55 |
else whisper_params["lang"], label=_("Language"))
|
| 56 |
+
dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt", "LRC"], value=whisper_params["file_format"], label=_("File Format"))
|
| 57 |
with gr.Row():
|
| 58 |
cb_translate = gr.Checkbox(value=whisper_params["is_translate"], label=_("Translate to English?"),
|
| 59 |
interactive=True)
|
configs/default_parameters.yaml
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
whisper:
|
| 2 |
model_size: "large-v2"
|
|
|
|
| 3 |
lang: "Automatic Detection"
|
| 4 |
is_translate: false
|
| 5 |
beam_size: 5
|
|
|
|
| 1 |
whisper:
|
| 2 |
model_size: "large-v2"
|
| 3 |
+
file_format: "SRT"
|
| 4 |
lang: "Automatic Detection"
|
| 5 |
is_translate: false
|
| 6 |
beam_size: 5
|
modules/whisper/base_transcription_pipeline.py
CHANGED
|
@@ -71,6 +71,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 71 |
def run(self,
|
| 72 |
audio: Union[str, BinaryIO, np.ndarray],
|
| 73 |
progress: gr.Progress = gr.Progress(),
|
|
|
|
| 74 |
add_timestamp: bool = True,
|
| 75 |
*pipeline_params,
|
| 76 |
) -> Tuple[List[Segment], float]:
|
|
@@ -86,6 +87,8 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 86 |
Audio input. This can be file path or binary type.
|
| 87 |
progress: gr.Progress
|
| 88 |
Indicator to show progress directly in gradio.
|
|
|
|
|
|
|
| 89 |
add_timestamp: bool
|
| 90 |
Whether to add a timestamp at the end of the filename.
|
| 91 |
*pipeline_params: tuple
|
|
@@ -168,6 +171,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 168 |
|
| 169 |
self.cache_parameters(
|
| 170 |
params=params,
|
|
|
|
| 171 |
add_timestamp=add_timestamp
|
| 172 |
)
|
| 173 |
return result, elapsed_time
|
|
@@ -224,6 +228,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 224 |
transcribed_segments, time_for_task = self.run(
|
| 225 |
file,
|
| 226 |
progress,
|
|
|
|
| 227 |
add_timestamp,
|
| 228 |
*pipeline_params,
|
| 229 |
)
|
|
@@ -298,6 +303,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 298 |
transcribed_segments, time_for_task = self.run(
|
| 299 |
mic_audio,
|
| 300 |
progress,
|
|
|
|
| 301 |
add_timestamp,
|
| 302 |
*pipeline_params,
|
| 303 |
)
|
|
@@ -364,6 +370,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 364 |
transcribed_segments, time_for_task = self.run(
|
| 365 |
audio,
|
| 366 |
progress,
|
|
|
|
| 367 |
add_timestamp,
|
| 368 |
*pipeline_params,
|
| 369 |
)
|
|
@@ -513,7 +520,8 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 513 |
@staticmethod
|
| 514 |
def cache_parameters(
|
| 515 |
params: TranscriptionPipelineParams,
|
| 516 |
-
|
|
|
|
| 517 |
):
|
| 518 |
"""Cache parameters to the yaml file"""
|
| 519 |
cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH)
|
|
@@ -521,6 +529,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 521 |
|
| 522 |
cached_yaml = {**cached_params, **param_to_cache}
|
| 523 |
cached_yaml["whisper"]["add_timestamp"] = add_timestamp
|
|
|
|
| 524 |
|
| 525 |
supress_token = cached_yaml["whisper"].get("suppress_tokens", None)
|
| 526 |
if supress_token and isinstance(supress_token, list):
|
|
|
|
| 71 |
def run(self,
|
| 72 |
audio: Union[str, BinaryIO, np.ndarray],
|
| 73 |
progress: gr.Progress = gr.Progress(),
|
| 74 |
+
file_format: str = "SRT",
|
| 75 |
add_timestamp: bool = True,
|
| 76 |
*pipeline_params,
|
| 77 |
) -> Tuple[List[Segment], float]:
|
|
|
|
| 87 |
Audio input. This can be file path or binary type.
|
| 88 |
progress: gr.Progress
|
| 89 |
Indicator to show progress directly in gradio.
|
| 90 |
+
file_format: str
|
| 91 |
+
Subtitle file format between ["SRT", "WebVTT", "txt", "lrc"]
|
| 92 |
add_timestamp: bool
|
| 93 |
Whether to add a timestamp at the end of the filename.
|
| 94 |
*pipeline_params: tuple
|
|
|
|
| 171 |
|
| 172 |
self.cache_parameters(
|
| 173 |
params=params,
|
| 174 |
+
file_format=file_format,
|
| 175 |
add_timestamp=add_timestamp
|
| 176 |
)
|
| 177 |
return result, elapsed_time
|
|
|
|
| 228 |
transcribed_segments, time_for_task = self.run(
|
| 229 |
file,
|
| 230 |
progress,
|
| 231 |
+
file_format,
|
| 232 |
add_timestamp,
|
| 233 |
*pipeline_params,
|
| 234 |
)
|
|
|
|
| 303 |
transcribed_segments, time_for_task = self.run(
|
| 304 |
mic_audio,
|
| 305 |
progress,
|
| 306 |
+
file_format,
|
| 307 |
add_timestamp,
|
| 308 |
*pipeline_params,
|
| 309 |
)
|
|
|
|
| 370 |
transcribed_segments, time_for_task = self.run(
|
| 371 |
audio,
|
| 372 |
progress,
|
| 373 |
+
file_format,
|
| 374 |
add_timestamp,
|
| 375 |
*pipeline_params,
|
| 376 |
)
|
|
|
|
| 520 |
@staticmethod
|
| 521 |
def cache_parameters(
|
| 522 |
params: TranscriptionPipelineParams,
|
| 523 |
+
file_format: str = "SRT",
|
| 524 |
+
add_timestamp: bool = True
|
| 525 |
):
|
| 526 |
"""Cache parameters to the yaml file"""
|
| 527 |
cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH)
|
|
|
|
| 529 |
|
| 530 |
cached_yaml = {**cached_params, **param_to_cache}
|
| 531 |
cached_yaml["whisper"]["add_timestamp"] = add_timestamp
|
| 532 |
+
cached_yaml["whisper"]["file_format"] = file_format
|
| 533 |
|
| 534 |
supress_token = cached_yaml["whisper"].get("suppress_tokens", None)
|
| 535 |
if supress_token and isinstance(supress_token, list):
|