transcribe_audio

Paused

App Files Files Community

cstr commited on Oct 2, 2024

Commit

a3dba94

verified ·

1 Parent(s): ed10fe0

log capture yield

Browse files

Files changed (1) hide show

app.py +38 -4

app.py CHANGED Viewed

@@ -12,6 +12,15 @@ import torch
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import yt_dlp
 logging.basicConfig(level=logging.INFO)
 # Clone and install faster-whisper from GitHub
@@ -319,7 +328,7 @@ with gr.Blocks() as iface:
     with gr.Row():
         start_time = gr.Number(label="Start Time (seconds)", value=0)
         end_time = gr.Number(label="End Time (seconds)", value=0)
-        verbose = gr.Checkbox(label="Verbose Output", value=False)
     transcribe_button = gr.Button("Transcribe")
@@ -339,17 +348,42 @@ with gr.Blocks() as iface:
     pipeline_type.change(update_model_dropdown, inputs=pipeline_type, outputs=model_id)
     transcribe_button.click(
-        transcribe_audio,
         inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
         outputs=[metrics_output, transcription_output, transcription_file]
     )
     gr.Examples(
         examples=[
-            ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, False],
             ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
-            ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
         ],
         inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
     )

 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import yt_dlp
+class LogCapture(io.StringIO):
+    def __init__(self, callback):
+        super().__init__()
+        self.callback = callback
+    def write(self, s):
+        super().write(s)
+        self.callback(s)
 logging.basicConfig(level=logging.INFO)
 # Clone and install faster-whisper from GitHub
     with gr.Row():
         start_time = gr.Number(label="Start Time (seconds)", value=0)
         end_time = gr.Number(label="End Time (seconds)", value=0)
+        verbose = gr.Checkbox(label="Verbose Output", value=True)  # Set to True by default
     transcribe_button = gr.Button("Transcribe")
     pipeline_type.change(update_model_dropdown, inputs=pipeline_type, outputs=model_id)
+    def transcribe_with_progress(*args):
+        args = list(args)
+        verbose_index = 8  # Assuming verbose is the 9th argument (index 8)
+        args[verbose_index] = True  # Force verbose to True
+        log_output = ""
+        def log_callback(message):
+            nonlocal log_output
+            log_output += message
+            yield log_output, "", None
+        logger = logging.getLogger()
+        logger.setLevel(logging.INFO)
+        log_capture = LogCapture(log_callback)
+        logger.addHandler(logging.StreamHandler(log_capture))
+        try:
+            for progress in transcribe_audio(*args):
+                yield log_output + progress, "", None
+        finally:
+            logger.removeHandler(log_capture)
+        final_transcription = "This is the final transcription."  # Replace with actual transcription
+        yield log_output, final_transcription, None  # You might want to yield the actual file here instead of None
     transcribe_button.click(
+        transcribe_with_progress,
         inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
         outputs=[metrics_output, transcription_output, transcription_file]
     )
     gr.Examples(
         examples=[
+            ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, True],
             ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
+            ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
         ],
         inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
     )