transcribe_audio

Paused

App Files Files Community

cstr commited on Oct 2, 2024

Commit

d274746

verified ·

1 Parent(s): 0a64646

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -53

app.py CHANGED Viewed

@@ -8,9 +8,12 @@ import requests
 from urllib.parse import urlparse
 # Clone and install faster-whisper from GitHub
-subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
-subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
-#subprocess.run(["pip", "install", "yt-dlp pytube ffmpeg-python"], check=True)
 # Add the faster-whisper directory to the Python path
 sys.path.append("./faster-whisper")
@@ -19,28 +22,30 @@ from faster_whisper import WhisperModel
 from faster_whisper.transcribe import BatchedInferencePipeline
 import yt_dlp
-def download_audio(url):
     parsed_url = urlparse(url)
     if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
-        return download_youtube_audio(url)
     else:
-        return download_direct_audio(url)
-def download_youtube_audio(url):
-    methods = [
-        youtube_dl_method,
-        pytube_method,
-        youtube_dl_alternative_method,
-        ffmpeg_method
-    ]
-    for method in methods:
-        try:
-            return method(url)
-        except Exception as e:
-            print(f"Method {method.__name__} failed: {str(e)}")
-    raise Exception("All download methods failed. Please try a different video or a direct audio URL.")
 def youtube_dl_method(url):
     ydl_opts = {
@@ -66,6 +71,21 @@ def pytube_method(url):
     os.rename(out_file, new_file)
     return new_file
 def youtube_dl_alternative_method(url):
     ydl_opts = {
         'format': 'bestaudio/best',
@@ -79,7 +99,6 @@ def youtube_dl_alternative_method(url):
         'quiet': True,
         'no_check_certificate': True,
         'prefer_insecure': True,
-        'nocheckcertificate': True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
@@ -91,16 +110,34 @@ def ffmpeg_method(url):
     subprocess.run(command, check=True, capture_output=True)
     return output_file
-def download_direct_audio(url):
-    response = requests.get(url)
-    if response.status_code == 200:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
-            temp_file.write(response.content)
-            return temp_file.name
     else:
-        raise Exception(f"Failed to download audio from {url}")
-def transcribe_audio(input_source, batch_size):
     try:
         # Initialize the model
         model = WhisperModel("cstr/whisper-large-v3-turbo-int8_float32", device="auto", compute_type="int8")
@@ -109,7 +146,10 @@ def transcribe_audio(input_source, batch_size):
         # Handle input source
         if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
             # It's a URL, download the audio
-            audio_path = download_audio(input_source)
         else:
             # It's a local file path
             audio_path = input_source
@@ -119,28 +159,36 @@ def transcribe_audio(input_source, batch_size):
         segments, info = batched_model.transcribe(audio_path, batch_size=batch_size)
         end_time = time.time()
-        # Generate transcription
-        transcription = ""
-        for segment in segments:
-            transcription += f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
-        # Calculate metrics
         transcription_time = end_time - start_time
         real_time_factor = info.duration / transcription_time
         audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)  # Size in MB
-        # Prepare output
-        output = f"Transcription:\n\n{transcription}\n"
-        output += f"\nLanguage: {info.language}, Probability: {info.language_probability:.2f}\n"
-        output += f"Duration: {info.duration:.2f}s, Duration after VAD: {info.duration_after_vad:.2f}s\n"
-        output += f"Transcription time: {transcription_time:.2f} seconds\n"
-        output += f"Real-time factor: {real_time_factor:.2f}x\n"
-        output += f"Audio file size: {audio_file_size:.2f} MB"
-        return output
     except Exception as e:
-        return f"An error occurred: {str(e)}"
     finally:
         # Clean up downloaded file if it was a URL
@@ -150,22 +198,33 @@ def transcribe_audio(input_source, batch_size):
             except:
                 pass
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
     inputs=[
         gr.Textbox(label="Audio Source (Upload, MP3 URL, or YouTube URL)"),
-        gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
     ],
-    outputs=gr.Textbox(label="Transcription and Metrics"),
     title="Faster Whisper Multi-Input Transcription",
-    description="Enter an audio file path, MP3 URL, or YouTube URL to transcribe using Faster Whisper (GitHub version). Adjust the batch size for performance tuning.",
     examples=[
-        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", 16],
-        ["https://example.com/path/to/audio.mp3", 16],
-        ["path/to/local/audio.mp3", 16]
     ],
     cache_examples=False  # Prevents automatic processing of examples
 )
-iface.launch()

 from urllib.parse import urlparse
 # Clone and install faster-whisper from GitHub
+try:
+    subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
+    subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
+except subprocess.CalledProcessError as e:
+    print(f"Error during faster-whisper installation: {e}")
+    sys.exit(1)
 # Add the faster-whisper directory to the Python path
 sys.path.append("./faster-whisper")
 from faster_whisper.transcribe import BatchedInferencePipeline
 import yt_dlp
+def download_audio(url, method_choice):
     parsed_url = urlparse(url)
     if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
+        return download_youtube_audio(url, method_choice)
     else:
+        return download_direct_audio(url, method_choice)
+# Additional YouTube download methods
+def download_youtube_audio(url, method_choice):
+    methods = {
+        'yt-dlp': youtube_dl_method,
+        'pytube': pytube_method,
+        'youtube-dl': youtube_dl_classic_method,
+        'yt-dlp-alt': youtube_dl_alternative_method,
+        'ffmpeg': ffmpeg_method,
+        'aria2': aria2_method
+    }
+    method = methods.get(method_choice, youtube_dl_method)
+    try:
+        return method(url)
+    except Exception as e:
+        return f"Error downloading using {method_choice}: {str(e)}"
 def youtube_dl_method(url):
     ydl_opts = {
     os.rename(out_file, new_file)
     return new_file
+def youtube_dl_classic_method(url):
+    # Classic youtube-dl method
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        'outtmpl': '%(id)s.%(ext)s',
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(url, download=True)
+        return f"{info['id']}.mp3"
 def youtube_dl_alternative_method(url):
     ydl_opts = {
         'format': 'bestaudio/best',
         'quiet': True,
         'no_check_certificate': True,
         'prefer_insecure': True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
     subprocess.run(command, check=True, capture_output=True)
     return output_file
+def aria2_method(url):
+    output_file = tempfile.mktemp(suffix='.mp3')
+    command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
+    subprocess.run(command, check=True, capture_output=True)
+    return output_file
+def download_direct_audio(url, method_choice):
+    if method_choice == 'wget':
+        return wget_method(url)
     else:
+        try:
+            response = requests.get(url)
+            if response.status_code == 200:
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
+                    temp_file.write(response.content)
+                    return temp_file.name
+            else:
+                raise Exception(f"Failed to download audio from {url}")
+        except Exception as e:
+            return f"Error downloading direct audio: {str(e)}"
+def wget_method(url):
+    output_file = tempfile.mktemp(suffix='.mp3')
+    command = ['wget', '-O', output_file, url]
+    subprocess.run(command, check=True, capture_output=True)
+    return output_file
+def transcribe_audio(input_source, batch_size, download_method):
     try:
         # Initialize the model
         model = WhisperModel("cstr/whisper-large-v3-turbo-int8_float32", device="auto", compute_type="int8")
         # Handle input source
         if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
             # It's a URL, download the audio
+            audio_path = download_audio(input_source, download_method)
+            if audio_path.startswith("Error"):
+                yield f"Error: {audio_path}", "", None
+                return
         else:
             # It's a local file path
             audio_path = input_source
         segments, info = batched_model.transcribe(audio_path, batch_size=batch_size)
         end_time = time.time()
+        # Show initial metrics as soon as possible
         transcription_time = end_time - start_time
         real_time_factor = info.duration / transcription_time
         audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)  # Size in MB
+        metrics_output = (
+            f"Language: {info.language}, Probability: {info.language_probability:.2f}\n"
+            f"Duration: {info.duration:.2f}s, Duration after VAD: {info.duration_after_vad:.2f}s\n"
+            f"Transcription time: {transcription_time:.2f} seconds\n"
+            f"Real-time factor: {real_time_factor:.2f}x\n"
+            f"Audio file size: {audio_file_size:.2f} MB\n"
+        )
+        yield metrics_output, "", None
+        transcription = ""
+        # Stream transcription output gradually
+        for segment in segments:
+            transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
+            transcription += transcription_segment
+            yield metrics_output, transcription, None
+        # Final output with download option
+        transcription_file = save_transcription(transcription)
+        yield metrics_output, transcription, transcription_file
     except Exception as e:
+        yield f"An error occurred: {str(e)}", "", None
     finally:
         # Clean up downloaded file if it was a URL
             except:
                 pass
+def save_transcription(transcription):
+    file_path = tempfile.mktemp(suffix='.txt')
+    with open(file_path, 'w') as f:
+        f.write(transcription)
+    return file_path
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
     inputs=[
         gr.Textbox(label="Audio Source (Upload, MP3 URL, or YouTube URL)"),
+        gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size"),
+        gr.Dropdown(choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"], label="Download Method", value="yt-dlp")
+    ],
+    outputs=[
+        gr.Textbox(label="Transcription Metrics and Verbose Messages", live=True),
+        gr.Textbox(label="Transcription", live=True),
+        gr.File(label="Download Transcription")
     ],
     title="Faster Whisper Multi-Input Transcription",
+    description="Enter an audio file path, MP3 URL, or YouTube URL to transcribe using Faster Whisper (GitHub version). Adjust the batch size and choose a download method.",
     examples=[
+        ["https://www.youtube.com/watch?v=daQ_hqA6HDo", 16, "yt-dlp"],
+        ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", 16, "ffmpeg"],
+        ["path/to/local/audio.mp3", 16, "yt-dlp"]
     ],
     cache_examples=False  # Prevents automatic processing of examples
 )
+iface.launch()