transcribe_audio

Paused

App Files Files Community

cstr commited on Oct 2, 2024

Commit

26eb097

verified ·

1 Parent(s): ae4be5c

+verbosity

Browse files

Files changed (1) hide show

app.py +15 -21

app.py CHANGED Viewed

@@ -15,15 +15,13 @@ import yt_dlp
 logging.basicConfig(level=logging.INFO)
 # Clone and install faster-whisper from GitHub
-# (we should be able to do this in build.sh in a hf space)
 try:
     subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
     subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
 except subprocess.CalledProcessError as e:
-    print(f"Error during faster-whisper installation: {e}")
     sys.exit(1)
-# Add the faster-whisper directory to the Python path
 sys.path.append("./faster-whisper")
 from faster_whisper import WhisperModel
@@ -159,16 +157,18 @@ def save_transcription(transcription):
 def get_model_options(pipeline_type):
     if pipeline_type == "faster-batched":
-        return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
     elif pipeline_type == "faster-sequenced":
-        return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
     elif pipeline_type == "transformers":
-        return ["openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "primeline/whisper-large-v3-german"]
-    else:
-        return []
 def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
     try:
         if pipeline_type == "faster-batched":
             model = WhisperModel(model_id, device="auto", compute_type=dtype)
             pipeline = BatchedInferencePipeline(model=model)
@@ -198,6 +198,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
             audio_path = download_audio(input_source, download_method)
             if audio_path.startswith("Error"):
                 yield f"Error: {audio_path}", "", None
                 return
@@ -207,6 +208,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         if start_time is not None or end_time is not None:
             trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
             audio_path = trimmed_audio_path
         start_time_perf = time.time()
         if pipeline_type in ["faster-batched", "faster-sequenced"]:
@@ -225,7 +227,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         )
         if verbose:
-            yield metrics_output, "", None
         transcription = ""
@@ -237,12 +239,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
             )
             transcription += transcription_segment
             if verbose:
-                yield metrics_output, transcription, None
         transcription_file = save_transcription(transcription)
-        yield metrics_output, transcription, transcription_file
     except Exception as e:
         yield f"An error occurred: {str(e)}", "", None
     finally:
@@ -261,15 +264,6 @@ def update_model_dropdown(pipeline_type):
     model_choices = get_model_options(pipeline_type)
     return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
-def get_model_options(pipeline_type):
-    if pipeline_type == "faster-batched":
-        return ["cstr/whisper-large-v3-turbo-int8_float32"]
-    elif pipeline_type == "faster-sequenced":
-        return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
-    elif pipeline_type == "transformers":
-        return ["openai/whisper-large-v3"]
-    return []
 with gr.Blocks() as iface:
     gr.Markdown("# Multi-Pipeline Transcription")
     gr.Markdown("Transcribe audio using multiple pipelines and models.")

 logging.basicConfig(level=logging.INFO)
 # Clone and install faster-whisper from GitHub
 try:
     subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
     subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
 except subprocess.CalledProcessError as e:
+    logging.error(f"Error during faster-whisper installation: {e}")
     sys.exit(1)
 sys.path.append("./faster-whisper")
 from faster_whisper import WhisperModel
 def get_model_options(pipeline_type):
     if pipeline_type == "faster-batched":
+        return ["cstr/whisper-large-v3-turbo-int8_float32"]
     elif pipeline_type == "faster-sequenced":
+        return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
     elif pipeline_type == "transformers":
+        return ["openai/whisper-large-v3"]
+    return []
 def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
     try:
+        logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
+        verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
         if pipeline_type == "faster-batched":
             model = WhisperModel(model_id, device="auto", compute_type=dtype)
             pipeline = BatchedInferencePipeline(model=model)
         if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
             audio_path = download_audio(input_source, download_method)
+            verbose_messages += f"Audio file downloaded: {audio_path}\n"
             if audio_path.startswith("Error"):
                 yield f"Error: {audio_path}", "", None
                 return
         if start_time is not None or end_time is not None:
             trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
             audio_path = trimmed_audio_path
+            verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
         start_time_perf = time.time()
         if pipeline_type in ["faster-batched", "faster-sequenced"]:
         )
         if verbose:
+            yield verbose_messages + metrics_output, "", None
         transcription = ""
             )
             transcription += transcription_segment
             if verbose:
+                yield verbose_messages + metrics_output, transcription, None
         transcription_file = save_transcription(transcription)
+        yield verbose_messages + metrics_output, transcription, transcription_file
     except Exception as e:
+        logging.error(f"An error occurred during transcription: {str(e)}")
         yield f"An error occurred: {str(e)}", "", None
     finally:
     model_choices = get_model_options(pipeline_type)
     return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
 with gr.Blocks() as iface:
     gr.Markdown("# Multi-Pipeline Transcription")
     gr.Markdown("Transcribe audio using multiple pipelines and models.")