fix
Browse files
app.py
CHANGED
|
@@ -528,12 +528,12 @@ def get_model_options(pipeline_type):
|
|
| 528 |
# Dictionary to store loaded models
|
| 529 |
loaded_models = {}
|
| 530 |
|
| 531 |
-
def transcribe_audio(
|
| 532 |
"""
|
| 533 |
Transcribes audio from a given source using the specified pipeline and model.
|
| 534 |
|
| 535 |
Args:
|
| 536 |
-
|
| 537 |
audio_url (str): URL of audio.
|
| 538 |
proxy_url (str): Proxy URL if needed.
|
| 539 |
proxy_username (str): Proxy username.
|
|
@@ -567,14 +567,10 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
|
|
| 567 |
audio_path = None
|
| 568 |
is_temp_file = False
|
| 569 |
|
| 570 |
-
if
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
is_temp_file = False
|
| 575 |
-
elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
|
| 576 |
-
audio_path = audio_upload
|
| 577 |
-
is_temp_file = False
|
| 578 |
elif audio_url is not None and len(audio_url.strip()) > 0:
|
| 579 |
# audio_url is provided
|
| 580 |
audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
|
|
@@ -584,7 +580,7 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
|
|
| 584 |
yield verbose_messages + error_msg, "", None
|
| 585 |
return
|
| 586 |
else:
|
| 587 |
-
error_msg = "No audio source provided. Please upload an audio file or enter a URL."
|
| 588 |
logging.error(error_msg)
|
| 589 |
yield verbose_messages + error_msg, "", None
|
| 590 |
return
|
|
@@ -696,6 +692,7 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
|
|
| 696 |
if audio_path and is_temp_file and os.path.exists(audio_path):
|
| 697 |
os.remove(audio_path)
|
| 698 |
|
|
|
|
| 699 |
with gr.Blocks() as iface:
|
| 700 |
gr.Markdown("# Audio Transcription")
|
| 701 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
|
@@ -785,7 +782,7 @@ with gr.Blocks() as iface:
|
|
| 785 |
[None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
|
| 786 |
[None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
|
| 787 |
],
|
| 788 |
-
inputs=[
|
| 789 |
)
|
| 790 |
|
| 791 |
iface.launch(share=False, debug=True)
|
|
|
|
| 528 |
# Dictionary to store loaded models
|
| 529 |
loaded_models = {}
|
| 530 |
|
| 531 |
+
def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
|
| 532 |
"""
|
| 533 |
Transcribes audio from a given source using the specified pipeline and model.
|
| 534 |
|
| 535 |
Args:
|
| 536 |
+
audio_input (str): Path to uploaded audio file or recorded audio.
|
| 537 |
audio_url (str): URL of audio.
|
| 538 |
proxy_url (str): Proxy URL if needed.
|
| 539 |
proxy_username (str): Proxy username.
|
|
|
|
| 567 |
audio_path = None
|
| 568 |
is_temp_file = False
|
| 569 |
|
| 570 |
+
if audio_input is not None and len(audio_input) > 0:
|
| 571 |
+
# audio_input is a filepath to uploaded or recorded audio
|
| 572 |
+
audio_path = audio_input
|
| 573 |
+
is_temp_file = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
elif audio_url is not None and len(audio_url.strip()) > 0:
|
| 575 |
# audio_url is provided
|
| 576 |
audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
|
|
|
|
| 580 |
yield verbose_messages + error_msg, "", None
|
| 581 |
return
|
| 582 |
else:
|
| 583 |
+
error_msg = "No audio source provided. Please upload an audio file, record audio, or enter a URL."
|
| 584 |
logging.error(error_msg)
|
| 585 |
yield verbose_messages + error_msg, "", None
|
| 586 |
return
|
|
|
|
| 692 |
if audio_path and is_temp_file and os.path.exists(audio_path):
|
| 693 |
os.remove(audio_path)
|
| 694 |
|
| 695 |
+
|
| 696 |
with gr.Blocks() as iface:
|
| 697 |
gr.Markdown("# Audio Transcription")
|
| 698 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
|
|
|
| 782 |
[None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
|
| 783 |
[None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
|
| 784 |
],
|
| 785 |
+
inputs=[audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
|
| 786 |
)
|
| 787 |
|
| 788 |
iface.launch(share=False, debug=True)
|