Spaces:
Running
Running
jhj0517
commited on
Commit
·
2a2f7c6
1
Parent(s):
19e342a
Use constant for gradio none validation values
Browse files
modules/utils/constants.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
from gradio_i18n import Translate, gettext as _
|
| 2 |
|
| 3 |
AUTOMATIC_DETECTION = _("Automatic Detection")
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from gradio_i18n import Translate, gettext as _
|
| 2 |
|
| 3 |
AUTOMATIC_DETECTION = _("Automatic Detection")
|
| 4 |
+
GRADIO_NONE_STR = ""
|
| 5 |
+
GRADIO_NONE_NUMBER_MAX = 9999
|
| 6 |
+
GRADIO_NONE_NUMBER_MIN = 0
|
modules/whisper/base_transcription_pipeline.py
CHANGED
|
@@ -15,7 +15,7 @@ from dataclasses import astuple
|
|
| 15 |
from modules.uvr.music_separator import MusicSeparator
|
| 16 |
from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
|
| 17 |
UVR_MODELS_DIR)
|
| 18 |
-
from modules.utils.constants import
|
| 19 |
from modules.utils.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
|
| 20 |
from modules.utils.youtube_manager import get_ytdata, get_ytaudio
|
| 21 |
from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml
|
|
@@ -519,19 +519,19 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 519 |
language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
|
| 520 |
params.whisper.lang = language_code_dict[params.lang]
|
| 521 |
|
| 522 |
-
if
|
| 523 |
params.whisper.initial_prompt = None
|
| 524 |
-
if
|
| 525 |
params.whisper.prefix = None
|
| 526 |
-
if
|
| 527 |
params.whisper.hotwords = None
|
| 528 |
-
if params.whisper.max_new_tokens ==
|
| 529 |
params.whisper.max_new_tokens = None
|
| 530 |
-
if params.whisper.hallucination_silence_threshold ==
|
| 531 |
params.whisper.hallucination_silence_threshold = None
|
| 532 |
-
if params.whisper.language_detection_threshold ==
|
| 533 |
params.whisper.language_detection_threshold = None
|
| 534 |
-
if params.vad.max_speech_duration_s
|
| 535 |
params.vad.max_speech_duration_s = float('inf')
|
| 536 |
return params
|
| 537 |
|
|
@@ -555,7 +555,7 @@ class BaseTranscriptionPipeline(ABC):
|
|
| 555 |
cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap()
|
| 556 |
|
| 557 |
if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'):
|
| 558 |
-
cached_yaml["vad"]["max_speech_duration_s"] =
|
| 559 |
|
| 560 |
if cached_yaml is not None and cached_yaml:
|
| 561 |
save_yaml(cached_yaml, DEFAULT_PARAMETERS_CONFIG_PATH)
|
|
|
|
| 15 |
from modules.uvr.music_separator import MusicSeparator
|
| 16 |
from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
|
| 17 |
UVR_MODELS_DIR)
|
| 18 |
+
from modules.utils.constants import *
|
| 19 |
from modules.utils.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
|
| 20 |
from modules.utils.youtube_manager import get_ytdata, get_ytaudio
|
| 21 |
from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml
|
|
|
|
| 519 |
language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
|
| 520 |
params.whisper.lang = language_code_dict[params.lang]
|
| 521 |
|
| 522 |
+
if params.whisper.initial_prompt == GRADIO_NONE_STR:
|
| 523 |
params.whisper.initial_prompt = None
|
| 524 |
+
if params.whisper.prefix == GRADIO_NONE_STR:
|
| 525 |
params.whisper.prefix = None
|
| 526 |
+
if params.whisper.hotwords == GRADIO_NONE_STR:
|
| 527 |
params.whisper.hotwords = None
|
| 528 |
+
if params.whisper.max_new_tokens == GRADIO_NONE_NUMBER_MIN:
|
| 529 |
params.whisper.max_new_tokens = None
|
| 530 |
+
if params.whisper.hallucination_silence_threshold == GRADIO_NONE_NUMBER_MIN:
|
| 531 |
params.whisper.hallucination_silence_threshold = None
|
| 532 |
+
if params.whisper.language_detection_threshold == GRADIO_NONE_NUMBER_MIN:
|
| 533 |
params.whisper.language_detection_threshold = None
|
| 534 |
+
if params.vad.max_speech_duration_s == GRADIO_NONE_NUMBER_MAX:
|
| 535 |
params.vad.max_speech_duration_s = float('inf')
|
| 536 |
return params
|
| 537 |
|
|
|
|
| 555 |
cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap()
|
| 556 |
|
| 557 |
if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'):
|
| 558 |
+
cached_yaml["vad"]["max_speech_duration_s"] = GRADIO_NONE_NUMBER_MAX
|
| 559 |
|
| 560 |
if cached_yaml is not None and cached_yaml:
|
| 561 |
save_yaml(cached_yaml, DEFAULT_PARAMETERS_CONFIG_PATH)
|
modules/whisper/data_classes.py
CHANGED
|
@@ -7,7 +7,7 @@ from enum import Enum
|
|
| 7 |
from copy import deepcopy
|
| 8 |
import yaml
|
| 9 |
|
| 10 |
-
from modules.utils.constants import
|
| 11 |
|
| 12 |
|
| 13 |
class WhisperImpl(Enum):
|
|
@@ -82,7 +82,7 @@ class VadParams(BaseParams):
|
|
| 82 |
),
|
| 83 |
gr.Number(
|
| 84 |
label="Maximum Speech Duration (s)",
|
| 85 |
-
value=defaults.get("max_speech_duration_s",
|
| 86 |
info="Maximum duration of speech chunks in \"seconds\"."
|
| 87 |
),
|
| 88 |
gr.Number(
|
|
@@ -373,7 +373,7 @@ class WhisperParams(BaseParams):
|
|
| 373 |
),
|
| 374 |
gr.Textbox(
|
| 375 |
label="Initial Prompt",
|
| 376 |
-
value=defaults.get("initial_prompt",
|
| 377 |
info="Initial prompt for first window"
|
| 378 |
),
|
| 379 |
gr.Slider(
|
|
@@ -411,7 +411,7 @@ class WhisperParams(BaseParams):
|
|
| 411 |
),
|
| 412 |
gr.Textbox(
|
| 413 |
label="Prefix",
|
| 414 |
-
value=defaults.get("prefix",
|
| 415 |
info="Prefix text for first window"
|
| 416 |
),
|
| 417 |
gr.Checkbox(
|
|
@@ -446,7 +446,7 @@ class WhisperParams(BaseParams):
|
|
| 446 |
),
|
| 447 |
gr.Number(
|
| 448 |
label="Max New Tokens",
|
| 449 |
-
value=defaults.get("max_new_tokens",
|
| 450 |
precision=0,
|
| 451 |
info="Maximum number of new tokens per chunk"
|
| 452 |
),
|
|
@@ -459,7 +459,7 @@ class WhisperParams(BaseParams):
|
|
| 459 |
gr.Number(
|
| 460 |
label="Hallucination Silence Threshold (sec)",
|
| 461 |
value=defaults.get("hallucination_silence_threshold",
|
| 462 |
-
|
| 463 |
info="Threshold for skipping silent periods in hallucination detection"
|
| 464 |
),
|
| 465 |
gr.Textbox(
|
|
@@ -470,7 +470,7 @@ class WhisperParams(BaseParams):
|
|
| 470 |
gr.Number(
|
| 471 |
label="Language Detection Threshold",
|
| 472 |
value=defaults.get("language_detection_threshold",
|
| 473 |
-
|
| 474 |
info="Threshold for language detection probability"
|
| 475 |
),
|
| 476 |
gr.Number(
|
|
|
|
| 7 |
from copy import deepcopy
|
| 8 |
import yaml
|
| 9 |
|
| 10 |
+
from modules.utils.constants import *
|
| 11 |
|
| 12 |
|
| 13 |
class WhisperImpl(Enum):
|
|
|
|
| 82 |
),
|
| 83 |
gr.Number(
|
| 84 |
label="Maximum Speech Duration (s)",
|
| 85 |
+
value=defaults.get("max_speech_duration_s", GRADIO_NONE_NUMBER_MAX),
|
| 86 |
info="Maximum duration of speech chunks in \"seconds\"."
|
| 87 |
),
|
| 88 |
gr.Number(
|
|
|
|
| 373 |
),
|
| 374 |
gr.Textbox(
|
| 375 |
label="Initial Prompt",
|
| 376 |
+
value=defaults.get("initial_prompt", GRADIO_NONE_STR),
|
| 377 |
info="Initial prompt for first window"
|
| 378 |
),
|
| 379 |
gr.Slider(
|
|
|
|
| 411 |
),
|
| 412 |
gr.Textbox(
|
| 413 |
label="Prefix",
|
| 414 |
+
value=defaults.get("prefix", GRADIO_NONE_STR),
|
| 415 |
info="Prefix text for first window"
|
| 416 |
),
|
| 417 |
gr.Checkbox(
|
|
|
|
| 446 |
),
|
| 447 |
gr.Number(
|
| 448 |
label="Max New Tokens",
|
| 449 |
+
value=defaults.get("max_new_tokens", GRADIO_NONE_NUMBER_MIN),
|
| 450 |
precision=0,
|
| 451 |
info="Maximum number of new tokens per chunk"
|
| 452 |
),
|
|
|
|
| 459 |
gr.Number(
|
| 460 |
label="Hallucination Silence Threshold (sec)",
|
| 461 |
value=defaults.get("hallucination_silence_threshold",
|
| 462 |
+
GRADIO_NONE_NUMBER_MIN),
|
| 463 |
info="Threshold for skipping silent periods in hallucination detection"
|
| 464 |
),
|
| 465 |
gr.Textbox(
|
|
|
|
| 470 |
gr.Number(
|
| 471 |
label="Language Detection Threshold",
|
| 472 |
value=defaults.get("language_detection_threshold",
|
| 473 |
+
GRADIO_NONE_NUMBER_MIN),
|
| 474 |
info="Threshold for language detection probability"
|
| 475 |
),
|
| 476 |
gr.Number(
|