Spaces:

jhj0517
/

Whisper-WebUI

Running

jhj0517 commited on Apr 29, 2024

Commit

5206df6

1 Parent(s): ac4bff9

add space

Files changed (1) hide show

modules/whisper_data_class.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from dataclasses import dataclass, fields
 import gradio as gr
 @dataclass
@@ -22,26 +23,35 @@ class WhisperGradioComponents:
     ----------
     model_size: gr.Dropdown
         Whisper model size.
     lang: gr.Dropdown
         Source language of the file to transcribe.
     is_translate: gr.Checkbox
         Boolean value that determines whether to translate to English.
         It's Whisper's feature to translate speech from another language directly into English end-to-end.
     beam_size: gr.Number
         Int value that is used for decoding option.
     log_prob_threshold: gr.Number
         If the average log probability over sampled tokens is below this value, treat as failed.
     no_speech_threshold: gr.Number
         If the no_speech probability is higher than this value AND
         the average log probability over sampled tokens is below `log_prob_threshold`,
         consider the segment as silent.
     compute_type: gr.Dropdown
         compute type for transcription.
         see more info : https://opennmt.net/CTranslate2/quantization.html
     best_of: gr.Number
         Number of candidates when sampling with non-zero temperature.
     patience: gr.Number
         Beam search patience factor.
     condition_on_previous_text: bool
         if True, the previous output of the model is provided as a prompt for the next window;
         disabling may make the text inconsistent across windows, but the model becomes less prone to

 from dataclasses import dataclass, fields
 import gradio as gr
+from typing import Optional
 @dataclass
     ----------
     model_size: gr.Dropdown
         Whisper model size.
     lang: gr.Dropdown
         Source language of the file to transcribe.
     is_translate: gr.Checkbox
         Boolean value that determines whether to translate to English.
         It's Whisper's feature to translate speech from another language directly into English end-to-end.
     beam_size: gr.Number
         Int value that is used for decoding option.
     log_prob_threshold: gr.Number
         If the average log probability over sampled tokens is below this value, treat as failed.
     no_speech_threshold: gr.Number
         If the no_speech probability is higher than this value AND
         the average log probability over sampled tokens is below `log_prob_threshold`,
         consider the segment as silent.
     compute_type: gr.Dropdown
         compute type for transcription.
         see more info : https://opennmt.net/CTranslate2/quantization.html
     best_of: gr.Number
         Number of candidates when sampling with non-zero temperature.
     patience: gr.Number
         Beam search patience factor.
     condition_on_previous_text: bool
         if True, the previous output of the model is provided as a prompt for the next window;
         disabling may make the text inconsistent across windows, but the model becomes less prone to