+px +ux -tc
Browse files
app.py
CHANGED
|
@@ -45,13 +45,16 @@ from faster_whisper.transcribe import BatchedInferencePipeline
|
|
| 45 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 46 |
logging.info(f"Using device: {device}")
|
| 47 |
|
| 48 |
-
def download_audio(url, method_choice):
|
| 49 |
"""
|
| 50 |
-
Downloads audio from a given URL using the specified method.
|
| 51 |
|
| 52 |
Args:
|
| 53 |
url (str): The URL of the audio.
|
| 54 |
method_choice (str): The method to use for downloading audio.
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
Returns:
|
| 57 |
tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
|
|
@@ -60,19 +63,19 @@ def download_audio(url, method_choice):
|
|
| 60 |
logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
|
| 61 |
try:
|
| 62 |
if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
|
| 63 |
-
audio_file = download_youtube_audio(url, method_choice)
|
| 64 |
if not audio_file:
|
| 65 |
error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
|
| 66 |
logging.error(error_msg)
|
| 67 |
return None, False
|
| 68 |
elif parsed_url.scheme == 'rtsp':
|
| 69 |
-
audio_file = download_rtsp_audio(url)
|
| 70 |
if not audio_file:
|
| 71 |
error_msg = f"Failed to download RTSP audio from {url}"
|
| 72 |
logging.error(error_msg)
|
| 73 |
return None, False
|
| 74 |
else:
|
| 75 |
-
audio_file = download_direct_audio(url, method_choice)
|
| 76 |
if not audio_file:
|
| 77 |
error_msg = f"Failed to download audio from {url} using method {method_choice}"
|
| 78 |
logging.error(error_msg)
|
|
@@ -84,13 +87,16 @@ def download_audio(url, method_choice):
|
|
| 84 |
return None, False
|
| 85 |
|
| 86 |
|
| 87 |
-
def download_youtube_audio(url, method_choice):
|
| 88 |
"""
|
| 89 |
Downloads audio from a YouTube URL using the specified method.
|
| 90 |
|
| 91 |
Args:
|
| 92 |
url (str): The YouTube URL.
|
| 93 |
method_choice (str): The method to use for downloading.
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
Returns:
|
| 96 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -102,17 +108,20 @@ def download_youtube_audio(url, method_choice):
|
|
| 102 |
method = methods.get(method_choice, yt_dlp_method)
|
| 103 |
try:
|
| 104 |
logging.info(f"Attempting to download YouTube audio using {method_choice}")
|
| 105 |
-
return method(url)
|
| 106 |
except Exception as e:
|
| 107 |
logging.error(f"Error downloading using {method_choice}: {str(e)}")
|
| 108 |
return None
|
| 109 |
|
| 110 |
-
def yt_dlp_method(url):
|
| 111 |
"""
|
| 112 |
Downloads YouTube audio using yt-dlp and saves it to a temporary file.
|
| 113 |
|
| 114 |
Args:
|
| 115 |
url (str): The YouTube URL.
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
Returns:
|
| 118 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -133,6 +142,8 @@ def yt_dlp_method(url):
|
|
| 133 |
'logger': MyLogger(), # Use a custom logger to capture yt-dlp logs
|
| 134 |
'progress_hooks': [my_hook], # Hook to capture download progress and errors
|
| 135 |
}
|
|
|
|
|
|
|
| 136 |
try:
|
| 137 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 138 |
info = ydl.extract_info(url, download=True)
|
|
@@ -174,12 +185,15 @@ def my_hook(d):
|
|
| 174 |
elif d['status'] == 'error':
|
| 175 |
logging.error(f"Download error: {d['filename']}")
|
| 176 |
|
| 177 |
-
def pytube_method(url):
|
| 178 |
"""
|
| 179 |
Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
|
| 180 |
|
| 181 |
Args:
|
| 182 |
url (str): The YouTube URL.
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
Returns:
|
| 185 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -187,7 +201,13 @@ def pytube_method(url):
|
|
| 187 |
logging.info("Using pytube method")
|
| 188 |
from pytube import YouTube
|
| 189 |
try:
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
audio_stream = yt.streams.filter(only_audio=True).first()
|
| 192 |
if audio_stream is None:
|
| 193 |
error_msg = "No audio streams available with pytube."
|
|
@@ -205,12 +225,13 @@ def pytube_method(url):
|
|
| 205 |
return None
|
| 206 |
|
| 207 |
|
| 208 |
-
def download_rtsp_audio(url):
|
| 209 |
"""
|
| 210 |
Downloads audio from an RTSP URL using FFmpeg.
|
| 211 |
|
| 212 |
Args:
|
| 213 |
url (str): The RTSP URL.
|
|
|
|
| 214 |
|
| 215 |
Returns:
|
| 216 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -218,8 +239,12 @@ def download_rtsp_audio(url):
|
|
| 218 |
logging.info("Using FFmpeg to download RTSP stream")
|
| 219 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 220 |
command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
try:
|
| 222 |
-
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 223 |
logging.info(f"Downloaded RTSP audio to: {output_file}")
|
| 224 |
return output_file
|
| 225 |
except subprocess.CalledProcessError as e:
|
|
@@ -229,13 +254,16 @@ def download_rtsp_audio(url):
|
|
| 229 |
logging.error(f"Error downloading RTSP audio: {str(e)}")
|
| 230 |
return None
|
| 231 |
|
| 232 |
-
def download_direct_audio(url, method_choice):
|
| 233 |
"""
|
| 234 |
Downloads audio from a direct URL using the specified method.
|
| 235 |
|
| 236 |
Args:
|
| 237 |
url (str): The direct URL of the audio file.
|
| 238 |
method_choice (str): The method to use for downloading.
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
Returns:
|
| 241 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -250,7 +278,7 @@ def download_direct_audio(url, method_choice):
|
|
| 250 |
}
|
| 251 |
method = methods.get(method_choice, requests_method)
|
| 252 |
try:
|
| 253 |
-
audio_file = method(url)
|
| 254 |
if not audio_file or not os.path.exists(audio_file):
|
| 255 |
error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
|
| 256 |
logging.error(error_msg)
|
|
@@ -260,18 +288,30 @@ def download_direct_audio(url, method_choice):
|
|
| 260 |
logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
|
| 261 |
return None
|
| 262 |
|
| 263 |
-
def requests_method(url):
|
| 264 |
"""
|
| 265 |
Downloads audio using the requests library.
|
| 266 |
|
| 267 |
Args:
|
| 268 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
Returns:
|
| 271 |
str: Path to the downloaded audio file, or None if failed.
|
| 272 |
"""
|
| 273 |
try:
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
if response.status_code == 200:
|
| 276 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
| 277 |
for chunk in response.iter_content(chunk_size=8192):
|
|
@@ -286,12 +326,15 @@ def requests_method(url):
|
|
| 286 |
logging.error(f"Error in requests_method: {str(e)}")
|
| 287 |
return None
|
| 288 |
|
| 289 |
-
def wget_method(url):
|
| 290 |
"""
|
| 291 |
Downloads audio using the wget command-line tool.
|
| 292 |
|
| 293 |
Args:
|
| 294 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
Returns:
|
| 297 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -299,8 +342,12 @@ def wget_method(url):
|
|
| 299 |
logging.info("Using wget method")
|
| 300 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 301 |
command = ['wget', '-O', output_file, url]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
try:
|
| 303 |
-
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 304 |
logging.info(f"Downloaded audio to: {output_file}")
|
| 305 |
return output_file
|
| 306 |
except subprocess.CalledProcessError as e:
|
|
@@ -310,12 +357,15 @@ def wget_method(url):
|
|
| 310 |
logging.error(f"Error in wget_method: {str(e)}")
|
| 311 |
return None
|
| 312 |
|
| 313 |
-
def yt_dlp_direct_method(url):
|
| 314 |
"""
|
| 315 |
Downloads audio using yt-dlp (supports various protocols and sites).
|
| 316 |
|
| 317 |
Args:
|
| 318 |
url (str): The URL of the audio or webpage containing audio.
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
Returns:
|
| 321 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -333,6 +383,8 @@ def yt_dlp_direct_method(url):
|
|
| 333 |
'preferredquality': '192',
|
| 334 |
}],
|
| 335 |
}
|
|
|
|
|
|
|
| 336 |
try:
|
| 337 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 338 |
ydl.download([url])
|
|
@@ -342,12 +394,15 @@ def yt_dlp_direct_method(url):
|
|
| 342 |
logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
|
| 343 |
return None
|
| 344 |
|
| 345 |
-
def ffmpeg_method(url):
|
| 346 |
"""
|
| 347 |
Downloads audio using FFmpeg.
|
| 348 |
|
| 349 |
Args:
|
| 350 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
|
|
|
| 351 |
|
| 352 |
Returns:
|
| 353 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -355,8 +410,12 @@ def ffmpeg_method(url):
|
|
| 355 |
logging.info("Using ffmpeg method")
|
| 356 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 357 |
command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
try:
|
| 359 |
-
subprocess.run(command, check=True, capture_output=True, text=True)
|
| 360 |
logging.info(f"Downloaded and converted audio to: {output_file}")
|
| 361 |
return output_file
|
| 362 |
except subprocess.CalledProcessError as e:
|
|
@@ -366,12 +425,15 @@ def ffmpeg_method(url):
|
|
| 366 |
logging.error(f"Error in ffmpeg_method: {str(e)}")
|
| 367 |
return None
|
| 368 |
|
| 369 |
-
def aria2_method(url):
|
| 370 |
"""
|
| 371 |
Downloads audio using aria2.
|
| 372 |
|
| 373 |
Args:
|
| 374 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
Returns:
|
| 377 |
str: Path to the downloaded audio file, or None if failed.
|
|
@@ -379,6 +441,8 @@ def aria2_method(url):
|
|
| 379 |
logging.info("Using aria2 method")
|
| 380 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 381 |
command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
|
|
|
|
|
|
|
| 382 |
try:
|
| 383 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
| 384 |
logging.info(f"Downloaded audio to: {output_file}")
|
|
@@ -402,8 +466,8 @@ def trim_audio(audio_path, start_time, end_time):
|
|
| 402 |
Returns:
|
| 403 |
str: Path to the trimmed audio file.
|
| 404 |
|
| 405 |
-
|
| 406 |
-
|
| 407 |
"""
|
| 408 |
try:
|
| 409 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
|
@@ -464,12 +528,16 @@ def get_model_options(pipeline_type):
|
|
| 464 |
# Dictionary to store loaded models
|
| 465 |
loaded_models = {}
|
| 466 |
|
| 467 |
-
def transcribe_audio(
|
| 468 |
"""
|
| 469 |
Transcribes audio from a given source using the specified pipeline and model.
|
| 470 |
|
| 471 |
Args:
|
| 472 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
|
| 474 |
model_id (str): The ID of the model to use.
|
| 475 |
dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
|
|
@@ -478,6 +546,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 478 |
start_time (float, optional): Start time in seconds for trimming audio.
|
| 479 |
end_time (float, optional): End time in seconds for trimming audio.
|
| 480 |
verbose (bool, optional): Whether to output verbose logging.
|
|
|
|
| 481 |
|
| 482 |
Yields:
|
| 483 |
Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
|
|
@@ -494,29 +563,28 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 494 |
if verbose:
|
| 495 |
yield verbose_messages, "", None
|
| 496 |
|
| 497 |
-
# Determine
|
| 498 |
audio_path = None
|
| 499 |
is_temp_file = False
|
| 500 |
|
| 501 |
-
if
|
| 502 |
-
if
|
| 503 |
-
#
|
| 504 |
-
audio_path
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
yield verbose_messages + error_msg, "", None
|
| 509 |
-
return
|
| 510 |
-
elif os.path.exists(input_source):
|
| 511 |
-
# Input source is a local file path
|
| 512 |
-
audio_path = input_source
|
| 513 |
is_temp_file = False
|
| 514 |
-
elif
|
| 515 |
-
#
|
| 516 |
-
audio_path =
|
| 517 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
else:
|
| 519 |
-
error_msg = "No
|
| 520 |
logging.error(error_msg)
|
| 521 |
yield verbose_messages + error_msg, "", None
|
| 522 |
return
|
|
@@ -601,9 +669,15 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 601 |
|
| 602 |
for segment in segments:
|
| 603 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
| 604 |
-
|
|
|
|
|
|
|
|
|
|
| 605 |
else:
|
| 606 |
-
|
|
|
|
|
|
|
|
|
|
| 607 |
transcription += transcription_segment
|
| 608 |
if verbose:
|
| 609 |
yield verbose_messages + metrics_output, transcription, None
|
|
@@ -627,39 +701,49 @@ with gr.Blocks() as iface:
|
|
| 627 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
| 628 |
|
| 629 |
with gr.Row():
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
label="Pipeline Type",
|
| 634 |
-
value="faster-batched"
|
| 635 |
-
)
|
| 636 |
-
model_id = gr.Dropdown(
|
| 637 |
-
label="Model",
|
| 638 |
-
choices=get_model_options("faster-batched"),
|
| 639 |
-
value=get_model_options("faster-batched")[0]
|
| 640 |
-
)
|
| 641 |
-
|
| 642 |
-
with gr.Row():
|
| 643 |
-
dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
|
| 644 |
-
batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
|
| 645 |
-
download_method = gr.Dropdown(
|
| 646 |
-
choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
|
| 647 |
-
label="Download Method",
|
| 648 |
-
value="yt-dlp"
|
| 649 |
-
)
|
| 650 |
-
|
| 651 |
with gr.Row():
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
|
| 656 |
transcribe_button = gr.Button("Transcribe")
|
| 657 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
with gr.Row():
|
| 659 |
metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
|
| 660 |
transcription_output = gr.Textbox(label="Transcription", lines=10)
|
| 661 |
transcription_file = gr.File(label="Download Transcription")
|
| 662 |
-
|
| 663 |
def update_model_dropdown(pipeline_type):
|
| 664 |
"""
|
| 665 |
Updates the model dropdown choices based on the selected pipeline type.
|
|
@@ -681,7 +765,7 @@ with gr.Blocks() as iface:
|
|
| 681 |
logging.error(f"Error in update_model_dropdown: {str(e)}")
|
| 682 |
return gr.update(choices=["Error"], value="Error", visible=True)
|
| 683 |
|
| 684 |
-
#
|
| 685 |
pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
|
| 686 |
|
| 687 |
def transcribe_with_progress(*args):
|
|
@@ -690,17 +774,16 @@ with gr.Blocks() as iface:
|
|
| 690 |
|
| 691 |
transcribe_button.click(
|
| 692 |
transcribe_with_progress,
|
| 693 |
-
inputs=[
|
| 694 |
outputs=[metrics_output, transcription_output, transcription_file]
|
| 695 |
)
|
| 696 |
|
| 697 |
gr.Examples(
|
| 698 |
examples=[
|
| 699 |
-
["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None,
|
| 700 |
-
["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/
|
| 701 |
-
["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
|
| 702 |
],
|
| 703 |
-
inputs=[
|
| 704 |
)
|
| 705 |
|
| 706 |
iface.launch(share=False, debug=True)
|
|
|
|
| 45 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 46 |
logging.info(f"Using device: {device}")
|
| 47 |
|
| 48 |
+
def download_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
|
| 49 |
"""
|
| 50 |
+
Downloads audio from a given URL using the specified method and proxy settings.
|
| 51 |
|
| 52 |
Args:
|
| 53 |
url (str): The URL of the audio.
|
| 54 |
method_choice (str): The method to use for downloading audio.
|
| 55 |
+
proxy_url (str): Proxy URL if needed.
|
| 56 |
+
proxy_username (str): Proxy username.
|
| 57 |
+
proxy_password (str): Proxy password.
|
| 58 |
|
| 59 |
Returns:
|
| 60 |
tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
|
|
|
|
| 63 |
logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
|
| 64 |
try:
|
| 65 |
if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
|
| 66 |
+
audio_file = download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
|
| 67 |
if not audio_file:
|
| 68 |
error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
|
| 69 |
logging.error(error_msg)
|
| 70 |
return None, False
|
| 71 |
elif parsed_url.scheme == 'rtsp':
|
| 72 |
+
audio_file = download_rtsp_audio(url, proxy_url)
|
| 73 |
if not audio_file:
|
| 74 |
error_msg = f"Failed to download RTSP audio from {url}"
|
| 75 |
logging.error(error_msg)
|
| 76 |
return None, False
|
| 77 |
else:
|
| 78 |
+
audio_file = download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
|
| 79 |
if not audio_file:
|
| 80 |
error_msg = f"Failed to download audio from {url} using method {method_choice}"
|
| 81 |
logging.error(error_msg)
|
|
|
|
| 87 |
return None, False
|
| 88 |
|
| 89 |
|
| 90 |
+
def download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
|
| 91 |
"""
|
| 92 |
Downloads audio from a YouTube URL using the specified method.
|
| 93 |
|
| 94 |
Args:
|
| 95 |
url (str): The YouTube URL.
|
| 96 |
method_choice (str): The method to use for downloading.
|
| 97 |
+
proxy_url (str): Proxy URL if needed.
|
| 98 |
+
proxy_username (str): Proxy username.
|
| 99 |
+
proxy_password (str): Proxy password.
|
| 100 |
|
| 101 |
Returns:
|
| 102 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 108 |
method = methods.get(method_choice, yt_dlp_method)
|
| 109 |
try:
|
| 110 |
logging.info(f"Attempting to download YouTube audio using {method_choice}")
|
| 111 |
+
return method(url, proxy_url, proxy_username, proxy_password)
|
| 112 |
except Exception as e:
|
| 113 |
logging.error(f"Error downloading using {method_choice}: {str(e)}")
|
| 114 |
return None
|
| 115 |
|
| 116 |
+
def yt_dlp_method(url, proxy_url, proxy_username, proxy_password):
|
| 117 |
"""
|
| 118 |
Downloads YouTube audio using yt-dlp and saves it to a temporary file.
|
| 119 |
|
| 120 |
Args:
|
| 121 |
url (str): The YouTube URL.
|
| 122 |
+
proxy_url (str): Proxy URL if needed.
|
| 123 |
+
proxy_username (str): Proxy username.
|
| 124 |
+
proxy_password (str): Proxy password.
|
| 125 |
|
| 126 |
Returns:
|
| 127 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 142 |
'logger': MyLogger(), # Use a custom logger to capture yt-dlp logs
|
| 143 |
'progress_hooks': [my_hook], # Hook to capture download progress and errors
|
| 144 |
}
|
| 145 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 146 |
+
ydl_opts['proxy'] = proxy_url
|
| 147 |
try:
|
| 148 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 149 |
info = ydl.extract_info(url, download=True)
|
|
|
|
| 185 |
elif d['status'] == 'error':
|
| 186 |
logging.error(f"Download error: {d['filename']}")
|
| 187 |
|
| 188 |
+
def pytube_method(url, proxy_url, proxy_username, proxy_password):
|
| 189 |
"""
|
| 190 |
Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
|
| 191 |
|
| 192 |
Args:
|
| 193 |
url (str): The YouTube URL.
|
| 194 |
+
proxy_url (str): Proxy URL if needed.
|
| 195 |
+
proxy_username (str): Proxy username.
|
| 196 |
+
proxy_password (str): Proxy password.
|
| 197 |
|
| 198 |
Returns:
|
| 199 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 201 |
logging.info("Using pytube method")
|
| 202 |
from pytube import YouTube
|
| 203 |
try:
|
| 204 |
+
proxies = None
|
| 205 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 206 |
+
proxies = {
|
| 207 |
+
"http": proxy_url,
|
| 208 |
+
"https": proxy_url
|
| 209 |
+
}
|
| 210 |
+
yt = YouTube(url, proxies=proxies)
|
| 211 |
audio_stream = yt.streams.filter(only_audio=True).first()
|
| 212 |
if audio_stream is None:
|
| 213 |
error_msg = "No audio streams available with pytube."
|
|
|
|
| 225 |
return None
|
| 226 |
|
| 227 |
|
| 228 |
+
def download_rtsp_audio(url, proxy_url):
|
| 229 |
"""
|
| 230 |
Downloads audio from an RTSP URL using FFmpeg.
|
| 231 |
|
| 232 |
Args:
|
| 233 |
url (str): The RTSP URL.
|
| 234 |
+
proxy_url (str): Proxy URL if needed.
|
| 235 |
|
| 236 |
Returns:
|
| 237 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 239 |
logging.info("Using FFmpeg to download RTSP stream")
|
| 240 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 241 |
command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
|
| 242 |
+
env = os.environ.copy()
|
| 243 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 244 |
+
env['http_proxy'] = proxy_url
|
| 245 |
+
env['https_proxy'] = proxy_url
|
| 246 |
try:
|
| 247 |
+
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
|
| 248 |
logging.info(f"Downloaded RTSP audio to: {output_file}")
|
| 249 |
return output_file
|
| 250 |
except subprocess.CalledProcessError as e:
|
|
|
|
| 254 |
logging.error(f"Error downloading RTSP audio: {str(e)}")
|
| 255 |
return None
|
| 256 |
|
| 257 |
+
def download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
|
| 258 |
"""
|
| 259 |
Downloads audio from a direct URL using the specified method.
|
| 260 |
|
| 261 |
Args:
|
| 262 |
url (str): The direct URL of the audio file.
|
| 263 |
method_choice (str): The method to use for downloading.
|
| 264 |
+
proxy_url (str): Proxy URL if needed.
|
| 265 |
+
proxy_username (str): Proxy username.
|
| 266 |
+
proxy_password (str): Proxy password.
|
| 267 |
|
| 268 |
Returns:
|
| 269 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 278 |
}
|
| 279 |
method = methods.get(method_choice, requests_method)
|
| 280 |
try:
|
| 281 |
+
audio_file = method(url, proxy_url, proxy_username, proxy_password)
|
| 282 |
if not audio_file or not os.path.exists(audio_file):
|
| 283 |
error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
|
| 284 |
logging.error(error_msg)
|
|
|
|
| 288 |
logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
|
| 289 |
return None
|
| 290 |
|
| 291 |
+
def requests_method(url, proxy_url, proxy_username, proxy_password):
|
| 292 |
"""
|
| 293 |
Downloads audio using the requests library.
|
| 294 |
|
| 295 |
Args:
|
| 296 |
url (str): The URL of the audio file.
|
| 297 |
+
proxy_url (str): Proxy URL if needed.
|
| 298 |
+
proxy_username (str): Proxy username.
|
| 299 |
+
proxy_password (str): Proxy password.
|
| 300 |
|
| 301 |
Returns:
|
| 302 |
str: Path to the downloaded audio file, or None if failed.
|
| 303 |
"""
|
| 304 |
try:
|
| 305 |
+
proxies = None
|
| 306 |
+
auth = None
|
| 307 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 308 |
+
proxies = {
|
| 309 |
+
"http": proxy_url,
|
| 310 |
+
"https": proxy_url
|
| 311 |
+
}
|
| 312 |
+
if proxy_username and proxy_password:
|
| 313 |
+
auth = (proxy_username, proxy_password)
|
| 314 |
+
response = requests.get(url, stream=True, proxies=proxies, auth=auth)
|
| 315 |
if response.status_code == 200:
|
| 316 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
| 317 |
for chunk in response.iter_content(chunk_size=8192):
|
|
|
|
| 326 |
logging.error(f"Error in requests_method: {str(e)}")
|
| 327 |
return None
|
| 328 |
|
| 329 |
+
def wget_method(url, proxy_url, proxy_username, proxy_password):
|
| 330 |
"""
|
| 331 |
Downloads audio using the wget command-line tool.
|
| 332 |
|
| 333 |
Args:
|
| 334 |
url (str): The URL of the audio file.
|
| 335 |
+
proxy_url (str): Proxy URL if needed.
|
| 336 |
+
proxy_username (str): Proxy username.
|
| 337 |
+
proxy_password (str): Proxy password.
|
| 338 |
|
| 339 |
Returns:
|
| 340 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 342 |
logging.info("Using wget method")
|
| 343 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 344 |
command = ['wget', '-O', output_file, url]
|
| 345 |
+
env = os.environ.copy()
|
| 346 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 347 |
+
env['http_proxy'] = proxy_url
|
| 348 |
+
env['https_proxy'] = proxy_url
|
| 349 |
try:
|
| 350 |
+
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
|
| 351 |
logging.info(f"Downloaded audio to: {output_file}")
|
| 352 |
return output_file
|
| 353 |
except subprocess.CalledProcessError as e:
|
|
|
|
| 357 |
logging.error(f"Error in wget_method: {str(e)}")
|
| 358 |
return None
|
| 359 |
|
| 360 |
+
def yt_dlp_direct_method(url, proxy_url, proxy_username, proxy_password):
|
| 361 |
"""
|
| 362 |
Downloads audio using yt-dlp (supports various protocols and sites).
|
| 363 |
|
| 364 |
Args:
|
| 365 |
url (str): The URL of the audio or webpage containing audio.
|
| 366 |
+
proxy_url (str): Proxy URL if needed.
|
| 367 |
+
proxy_username (str): Proxy username.
|
| 368 |
+
proxy_password (str): Proxy password.
|
| 369 |
|
| 370 |
Returns:
|
| 371 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 383 |
'preferredquality': '192',
|
| 384 |
}],
|
| 385 |
}
|
| 386 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 387 |
+
ydl_opts['proxy'] = proxy_url
|
| 388 |
try:
|
| 389 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 390 |
ydl.download([url])
|
|
|
|
| 394 |
logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
|
| 395 |
return None
|
| 396 |
|
| 397 |
+
def ffmpeg_method(url, proxy_url, proxy_username, proxy_password):
|
| 398 |
"""
|
| 399 |
Downloads audio using FFmpeg.
|
| 400 |
|
| 401 |
Args:
|
| 402 |
url (str): The URL of the audio file.
|
| 403 |
+
proxy_url (str): Proxy URL if needed.
|
| 404 |
+
proxy_username (str): Proxy username.
|
| 405 |
+
proxy_password (str): Proxy password.
|
| 406 |
|
| 407 |
Returns:
|
| 408 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 410 |
logging.info("Using ffmpeg method")
|
| 411 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 412 |
command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
|
| 413 |
+
env = os.environ.copy()
|
| 414 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 415 |
+
env['http_proxy'] = proxy_url
|
| 416 |
+
env['https_proxy'] = proxy_url
|
| 417 |
try:
|
| 418 |
+
subprocess.run(command, check=True, capture_output=True, text=True, env=env)
|
| 419 |
logging.info(f"Downloaded and converted audio to: {output_file}")
|
| 420 |
return output_file
|
| 421 |
except subprocess.CalledProcessError as e:
|
|
|
|
| 425 |
logging.error(f"Error in ffmpeg_method: {str(e)}")
|
| 426 |
return None
|
| 427 |
|
| 428 |
+
def aria2_method(url, proxy_url, proxy_username, proxy_password):
|
| 429 |
"""
|
| 430 |
Downloads audio using aria2.
|
| 431 |
|
| 432 |
Args:
|
| 433 |
url (str): The URL of the audio file.
|
| 434 |
+
proxy_url (str): Proxy URL if needed.
|
| 435 |
+
proxy_username (str): Proxy username.
|
| 436 |
+
proxy_password (str): Proxy password.
|
| 437 |
|
| 438 |
Returns:
|
| 439 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
|
| 441 |
logging.info("Using aria2 method")
|
| 442 |
output_file = tempfile.mktemp(suffix='.mp3')
|
| 443 |
command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
|
| 444 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
| 445 |
+
command.extend(['--all-proxy', proxy_url])
|
| 446 |
try:
|
| 447 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
| 448 |
logging.info(f"Downloaded audio to: {output_file}")
|
|
|
|
| 466 |
Returns:
|
| 467 |
str: Path to the trimmed audio file.
|
| 468 |
|
| 469 |
+
Raises:
|
| 470 |
+
gr.Error: If invalid start or end times are provided.
|
| 471 |
"""
|
| 472 |
try:
|
| 473 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
|
|
|
| 528 |
# Dictionary to store loaded models
|
| 529 |
loaded_models = {}
|
| 530 |
|
| 531 |
+
def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
|
| 532 |
"""
|
| 533 |
Transcribes audio from a given source using the specified pipeline and model.
|
| 534 |
|
| 535 |
Args:
|
| 536 |
+
audio_upload (file): Uploaded audio file.
|
| 537 |
+
audio_url (str): URL of audio.
|
| 538 |
+
proxy_url (str): Proxy URL if needed.
|
| 539 |
+
proxy_username (str): Proxy username.
|
| 540 |
+
proxy_password (str): Proxy password.
|
| 541 |
pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
|
| 542 |
model_id (str): The ID of the model to use.
|
| 543 |
dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
|
|
|
|
| 546 |
start_time (float, optional): Start time in seconds for trimming audio.
|
| 547 |
end_time (float, optional): End time in seconds for trimming audio.
|
| 548 |
verbose (bool, optional): Whether to output verbose logging.
|
| 549 |
+
include_timecodes (bool, optional): Whether to include timecodes in the transcription.
|
| 550 |
|
| 551 |
Yields:
|
| 552 |
Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
|
|
|
|
| 563 |
if verbose:
|
| 564 |
yield verbose_messages, "", None
|
| 565 |
|
| 566 |
+
# Determine the audio source
|
| 567 |
audio_path = None
|
| 568 |
is_temp_file = False
|
| 569 |
|
| 570 |
+
if audio_upload is not None:
|
| 571 |
+
if isinstance(audio_upload, dict) and 'name' in audio_upload:
|
| 572 |
+
# audio_upload is a dict with file info
|
| 573 |
+
audio_path = audio_upload['name']
|
| 574 |
+
is_temp_file = False
|
| 575 |
+
elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
|
| 576 |
+
audio_path = audio_upload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
is_temp_file = False
|
| 578 |
+
elif audio_url is not None and len(audio_url.strip()) > 0:
|
| 579 |
+
# audio_url is provided
|
| 580 |
+
audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
|
| 581 |
+
if not audio_path:
|
| 582 |
+
error_msg = f"Error downloading audio from {audio_url} using method {download_method}. Check logs for details."
|
| 583 |
+
logging.error(error_msg)
|
| 584 |
+
yield verbose_messages + error_msg, "", None
|
| 585 |
+
return
|
| 586 |
else:
|
| 587 |
+
error_msg = "No audio source provided. Please upload an audio file or enter a URL."
|
| 588 |
logging.error(error_msg)
|
| 589 |
yield verbose_messages + error_msg, "", None
|
| 590 |
return
|
|
|
|
| 669 |
|
| 670 |
for segment in segments:
|
| 671 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
| 672 |
+
if include_timecodes:
|
| 673 |
+
transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
|
| 674 |
+
else:
|
| 675 |
+
transcription_segment = f"{segment.text}\n"
|
| 676 |
else:
|
| 677 |
+
if include_timecodes:
|
| 678 |
+
transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
|
| 679 |
+
else:
|
| 680 |
+
transcription_segment = f"{segment['text']}\n"
|
| 681 |
transcription += transcription_segment
|
| 682 |
if verbose:
|
| 683 |
yield verbose_messages + metrics_output, transcription, None
|
|
|
|
| 701 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
| 702 |
|
| 703 |
with gr.Row():
|
| 704 |
+
audio_upload = gr.Audio(label="Upload or Record Audio", source="upload")
|
| 705 |
+
audio_url = gr.Textbox(label="Or Enter URL of audio file or YouTube link")
|
| 706 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
with gr.Row():
|
| 708 |
+
proxy_url = gr.Textbox(label="Proxy URL", placeholder="Enter proxy URL if needed", value="", lines=1)
|
| 709 |
+
proxy_username = gr.Textbox(label="Proxy Username", placeholder="Proxy username (optional)", value="", lines=1)
|
| 710 |
+
proxy_password = gr.Textbox(label="Proxy Password", placeholder="Proxy password (optional)", value="", lines=1, type="password")
|
| 711 |
|
| 712 |
transcribe_button = gr.Button("Transcribe")
|
| 713 |
+
|
| 714 |
+
with gr.Accordion("Advanced Options", open=False):
|
| 715 |
+
with gr.Row():
|
| 716 |
+
pipeline_type = gr.Dropdown(
|
| 717 |
+
choices=["faster-batched", "faster-sequenced", "transformers"],
|
| 718 |
+
label="Pipeline Type",
|
| 719 |
+
value="faster-batched"
|
| 720 |
+
)
|
| 721 |
+
model_id = gr.Dropdown(
|
| 722 |
+
label="Model",
|
| 723 |
+
choices=get_model_options("faster-batched"),
|
| 724 |
+
value="cstr/whisper-large-v3-turbo-int8_float32"
|
| 725 |
+
)
|
| 726 |
+
|
| 727 |
+
with gr.Row():
|
| 728 |
+
dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
|
| 729 |
+
batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
|
| 730 |
+
download_method = gr.Dropdown(
|
| 731 |
+
choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
|
| 732 |
+
label="Download Method",
|
| 733 |
+
value="yt-dlp"
|
| 734 |
+
)
|
| 735 |
+
|
| 736 |
+
with gr.Row():
|
| 737 |
+
start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
|
| 738 |
+
end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
|
| 739 |
+
verbose = gr.Checkbox(label="Verbose Output", value=False)
|
| 740 |
+
include_timecodes = gr.Checkbox(label="Include timecodes in transcription", value=False)
|
| 741 |
+
|
| 742 |
with gr.Row():
|
| 743 |
metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
|
| 744 |
transcription_output = gr.Textbox(label="Transcription", lines=10)
|
| 745 |
transcription_file = gr.File(label="Download Transcription")
|
| 746 |
+
|
| 747 |
def update_model_dropdown(pipeline_type):
|
| 748 |
"""
|
| 749 |
Updates the model dropdown choices based on the selected pipeline type.
|
|
|
|
| 765 |
logging.error(f"Error in update_model_dropdown: {str(e)}")
|
| 766 |
return gr.update(choices=["Error"], value="Error", visible=True)
|
| 767 |
|
| 768 |
+
# Event handler for pipeline_type change
|
| 769 |
pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
|
| 770 |
|
| 771 |
def transcribe_with_progress(*args):
|
|
|
|
| 774 |
|
| 775 |
transcribe_button.click(
|
| 776 |
transcribe_with_progress,
|
| 777 |
+
inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
|
| 778 |
outputs=[metrics_output, transcription_output, transcription_file]
|
| 779 |
)
|
| 780 |
|
| 781 |
gr.Examples(
|
| 782 |
examples=[
|
| 783 |
+
[None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
|
| 784 |
+
[None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
|
|
|
|
| 785 |
],
|
| 786 |
+
inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
|
| 787 |
)
|
| 788 |
|
| 789 |
iface.launch(share=False, debug=True)
|