nb-whisper-demo

Running on T4

App Files Files Community

pere commited on Oct 8, 2024

Commit

f4d4476

1 Parent(s): add94c7

update test

Browse files

Files changed (1) hide show

app.py +16 -21

app.py CHANGED Viewed

@@ -65,24 +65,19 @@ def _return_yt_html_embed(yt_url):
     return HTML_str
-def yt_transcribe(yt_url, return_timestamps=False):
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'outtmpl': 'audio.mp3',
-        'noplaylist': True
-    }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([yt_url])
-    except Exception as e:
-        return f"Error downloading audio: {str(e)}"
-    if not os.path.exists("audio.mp3"):
-        return "Downloaded audio file not found."
-    text = transcribe("audio.mp3", return_timestamps=return_timestamps)
-    return _return_yt_html_embed(yt_url), text
 demo = gr.Blocks()
@@ -90,8 +85,8 @@ demo = gr.Blocks()
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
-        gr.components.Checkbox(label="Return timestamps"),
     ],
     outputs="text",
     title="NB-Whisper Demo",
@@ -106,20 +101,20 @@ mf_transcribe = gr.Interface(
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
-        gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-        gr.components.Checkbox(label="Return timestamps"),
     ],
-    examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
     outputs=["html", "text"],
     title="Whisper Demo: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
-        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
         " arbitrary length."
     ),
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([
         mf_transcribe,

     return HTML_str
+@spaces.GPU
+def yt_transcribe(yt_url, task):
+    html_embed_str = _return_yt_html_embed(yt_url)
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        filepath = os.path.join(tmpdirname, "audio.mp3")
+        download_yt_audio(yt_url, filepath)
+        inputs = ffmpeg_read(filepath, pipe.feature_extractor.sampling_rate)
+        inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+    return html_embed_str, text
 demo = gr.Blocks()
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(sources="microphone", type="filepath"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
     outputs="text",
     title="NB-Whisper Demo",
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
+        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
     outputs=["html", "text"],
     title="Whisper Demo: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
+        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
         " arbitrary length."
     ),
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([
         mf_transcribe,