Spaces:

IFMedTechdemo
/

Audio2Text

Running

App Files Files Community

IFMedTechdemo commited on Sep 29

Commit

c72ac42

verified ·

1 Parent(s): d5ca60a

Create app.py

Browse files

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+Whisper Audio-to-Text – ZeroGPU edition
+Runs on 🤗 Spaces with ZeroGPU (A100) accelerator
+"""
+import os
+import tempfile
+import gradio as gr
+import whisper
+import numpy as np
+from huggingface_hub import hf_hub_download
+# ------------------------------------------------------------------
+# 1.  ZeroGPU decorator
+# ------------------------------------------------------------------
+import spaces                       # pip install huggingface-hub>=0.16
+# ------------------------------------------------------------------
+# 2.  Load model once per GPU worker
+# ------------------------------------------------------------------
+MODEL_ID = "openai/whisper-base"    # pick any HF whisper ckpt
+MODEL = None
+def _load_model():
+    global MODEL
+    if MODEL is None:
+        # download weights from HF hub (cached)
+        ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin")
+        MODEL = whisper.load_model("base")   # still uses same weights
+    return MODEL
+# ------------------------------------------------------------------
+# 3.  GPU-decorated transcription
+# ------------------------------------------------------------------
+@spaces.GPU
+def transcribe(audio):
+    """
+    audio: filepath (upload) or (sr, data) tuple (mic)
+    returns: transcribed text
+    """
+    if audio is None:
+        return "⚠️  No audio received."
+    # ---- handle microphone ----
+    if isinstance(audio, tuple):
+        sr, data = audio
+        data = data.astype(np.float32)
+        if np.abs(data).max() > 1.0:
+            data /= np.abs(data).max()
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            import soundfile as sf
+            sf.write(tmp.name, data, sr)
+            audio_path = tmp.name
+    else:
+        audio_path = audio
+    # ---- run Whisper on GPU ----
+    try:
+        model = _load_model()
+        result = model.transcribe(audio_path, fp16=True)   # fp16 OK on GPU
+        text = result["text"].strip()
+        return text if text else "🤷‍♂️ No speech detected."
+    except Exception as e:
+        return f"❌ Error: {e}"
+    finally:
+        if audio_path != audio and os.path.exists(audio_path):
+            os.unlink(audio_path)
+# ------------------------------------------------------------------
+# 4.  Gradio UI (unchanged)
+# ------------------------------------------------------------------
+demo = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
+    outputs=gr.Textbox(label="Transcription", lines=6),
+    title="🎙️ Whisper Audio-to-Text (ZeroGPU)",
+    description="Upload or record audio → instant transcription on A100.",
+)
+if __name__ == "__main__":
+    demo.launch()