Spaces:
Running
Running
jhj0517
commited on
Commit
·
3aeef88
1
Parent(s):
3a1a0a3
add `compression_ratio_threshold`
Browse files- app.py +8 -3
- modules/faster_whisper_inference.py +2 -1
- modules/whisper_Inference.py +2 -1
- modules/whisper_parameter.py +7 -1
app.py
CHANGED
|
@@ -68,6 +68,7 @@ class App:
|
|
| 68 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
| 69 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
| 70 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
|
|
|
| 71 |
with gr.Row():
|
| 72 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 73 |
with gr.Row():
|
|
@@ -87,7 +88,8 @@ class App:
|
|
| 87 |
patience=nb_patience,
|
| 88 |
condition_on_previous_text=cb_condition_on_previous_text,
|
| 89 |
initial_prompt=tb_initial_prompt,
|
| 90 |
-
temperature=sd_temperature
|
|
|
|
| 91 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
| 92 |
inputs=params + whisper_params.to_list(),
|
| 93 |
outputs=[tb_indicator, files_subtitles])
|
|
@@ -124,6 +126,7 @@ class App:
|
|
| 124 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
| 125 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
| 126 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
|
|
|
| 127 |
with gr.Row():
|
| 128 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 129 |
with gr.Row():
|
|
@@ -143,7 +146,8 @@ class App:
|
|
| 143 |
patience=nb_patience,
|
| 144 |
condition_on_previous_text=cb_condition_on_previous_text,
|
| 145 |
initial_prompt=tb_initial_prompt,
|
| 146 |
-
temperature=sd_temperature
|
|
|
|
| 147 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
| 148 |
inputs=params + whisper_params.to_list(),
|
| 149 |
outputs=[tb_indicator, files_subtitles])
|
|
@@ -192,7 +196,8 @@ class App:
|
|
| 192 |
patience=nb_patience,
|
| 193 |
condition_on_previous_text=cb_condition_on_previous_text,
|
| 194 |
initial_prompt=tb_initial_prompt,
|
| 195 |
-
temperature=sd_temperature
|
|
|
|
| 196 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
| 197 |
inputs=params + whisper_params.to_list(),
|
| 198 |
outputs=[tb_indicator, files_subtitles])
|
|
|
|
| 68 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
| 69 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
| 70 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
| 71 |
+
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
| 72 |
with gr.Row():
|
| 73 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 74 |
with gr.Row():
|
|
|
|
| 88 |
patience=nb_patience,
|
| 89 |
condition_on_previous_text=cb_condition_on_previous_text,
|
| 90 |
initial_prompt=tb_initial_prompt,
|
| 91 |
+
temperature=sd_temperature,
|
| 92 |
+
compression_ratio_threshold=nb_compression_ratio_threshold)
|
| 93 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
| 94 |
inputs=params + whisper_params.to_list(),
|
| 95 |
outputs=[tb_indicator, files_subtitles])
|
|
|
|
| 126 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
| 127 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
| 128 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
| 129 |
+
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
| 130 |
with gr.Row():
|
| 131 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 132 |
with gr.Row():
|
|
|
|
| 146 |
patience=nb_patience,
|
| 147 |
condition_on_previous_text=cb_condition_on_previous_text,
|
| 148 |
initial_prompt=tb_initial_prompt,
|
| 149 |
+
temperature=sd_temperature,
|
| 150 |
+
compression_ratio_threshold=nb_compression_ratio_threshold)
|
| 151 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
| 152 |
inputs=params + whisper_params.to_list(),
|
| 153 |
outputs=[tb_indicator, files_subtitles])
|
|
|
|
| 196 |
patience=nb_patience,
|
| 197 |
condition_on_previous_text=cb_condition_on_previous_text,
|
| 198 |
initial_prompt=tb_initial_prompt,
|
| 199 |
+
temperature=sd_temperature,
|
| 200 |
+
compression_ratio_threshold=nb_compression_ratio_threshold)
|
| 201 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
| 202 |
inputs=params + whisper_params.to_list(),
|
| 203 |
outputs=[tb_indicator, files_subtitles])
|
modules/faster_whisper_inference.py
CHANGED
|
@@ -269,7 +269,8 @@ class FasterWhisperInference(BaseInterface):
|
|
| 269 |
no_speech_threshold=params.no_speech_threshold,
|
| 270 |
best_of=params.best_of,
|
| 271 |
patience=params.patience,
|
| 272 |
-
temperature=params.temperature
|
|
|
|
| 273 |
)
|
| 274 |
progress(0, desc="Loading audio..")
|
| 275 |
|
|
|
|
| 269 |
no_speech_threshold=params.no_speech_threshold,
|
| 270 |
best_of=params.best_of,
|
| 271 |
patience=params.patience,
|
| 272 |
+
temperature=params.temperature,
|
| 273 |
+
compression_ratio_threshold=params.compression_ratio_threshold,
|
| 274 |
)
|
| 275 |
progress(0, desc="Loading audio..")
|
| 276 |
|
modules/whisper_Inference.py
CHANGED
|
@@ -258,7 +258,8 @@ class WhisperInference(BaseInterface):
|
|
| 258 |
best_of=params.best_of,
|
| 259 |
patience=params.patience,
|
| 260 |
temperature=params.temperature,
|
| 261 |
-
|
|
|
|
| 262 |
elapsed_time = time.time() - start_time
|
| 263 |
|
| 264 |
return segments_result, elapsed_time
|
|
|
|
| 258 |
best_of=params.best_of,
|
| 259 |
patience=params.patience,
|
| 260 |
temperature=params.temperature,
|
| 261 |
+
compression_ratio_threshold=params.compression_ratio_threshold,
|
| 262 |
+
progress_callback=progress_callback,)["segments"]
|
| 263 |
elapsed_time = time.time() - start_time
|
| 264 |
|
| 265 |
return segments_result, elapsed_time
|
modules/whisper_parameter.py
CHANGED
|
@@ -17,6 +17,7 @@ class WhisperGradioComponents:
|
|
| 17 |
condition_on_previous_text: gr.Checkbox
|
| 18 |
initial_prompt: gr.Textbox
|
| 19 |
temperature: gr.Slider
|
|
|
|
| 20 |
"""
|
| 21 |
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
|
| 22 |
See more about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
@@ -64,9 +65,13 @@ class WhisperGradioComponents:
|
|
| 64 |
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
| 65 |
to make it more likely to predict those word correctly.
|
| 66 |
|
| 67 |
-
temperature:
|
|
|
|
| 68 |
which will be successively used upon failures according to either
|
| 69 |
`compression_ratio_threshold` or `log_prob_threshold`.
|
|
|
|
|
|
|
|
|
|
| 70 |
"""
|
| 71 |
|
| 72 |
def to_list(self) -> list:
|
|
@@ -95,6 +100,7 @@ class WhisperValues:
|
|
| 95 |
condition_on_previous_text: bool
|
| 96 |
initial_prompt: Optional[str]
|
| 97 |
temperature: float
|
|
|
|
| 98 |
"""
|
| 99 |
A data class to use Whisper parameters. Use "after" Gradio pre-processing.
|
| 100 |
See more about Gradio pre-processing: : https://www.gradio.app/docs/components
|
|
|
|
| 17 |
condition_on_previous_text: gr.Checkbox
|
| 18 |
initial_prompt: gr.Textbox
|
| 19 |
temperature: gr.Slider
|
| 20 |
+
compression_ratio_threshold: gr.Number
|
| 21 |
"""
|
| 22 |
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
|
| 23 |
See more about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
|
|
| 65 |
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
| 66 |
to make it more likely to predict those word correctly.
|
| 67 |
|
| 68 |
+
temperature: gr.Slider
|
| 69 |
+
Temperature for sampling. It can be a tuple of temperatures,
|
| 70 |
which will be successively used upon failures according to either
|
| 71 |
`compression_ratio_threshold` or `log_prob_threshold`.
|
| 72 |
+
|
| 73 |
+
compression_ratio_threshold: float
|
| 74 |
+
If the gzip compression ratio is above this value, treat as failed
|
| 75 |
"""
|
| 76 |
|
| 77 |
def to_list(self) -> list:
|
|
|
|
| 100 |
condition_on_previous_text: bool
|
| 101 |
initial_prompt: Optional[str]
|
| 102 |
temperature: float
|
| 103 |
+
compression_ratio_threshold: float
|
| 104 |
"""
|
| 105 |
A data class to use Whisper parameters. Use "after" Gradio pre-processing.
|
| 106 |
See more about Gradio pre-processing: : https://www.gradio.app/docs/components
|