""" Whisper Audio-to-Text – ZeroGPU edition Runs on 🤗 Spaces with ZeroGPU (A100) accelerator """ import os import tempfile import gradio as gr import whisper import numpy as np from huggingface_hub import hf_hub_download # ------------------------------------------------------------------ # 1. ZeroGPU decorator # ------------------------------------------------------------------ import spaces # pip install huggingface-hub>=0.16 # ------------------------------------------------------------------ # 2. Load model once per GPU worker # ------------------------------------------------------------------ MODEL_ID = "openai/whisper-base" # pick any HF whisper ckpt MODEL = None def _load_model(): global MODEL if MODEL is None: # download weights from HF hub (cached) ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin") MODEL = whisper.load_model("base") # still uses same weights return MODEL # ------------------------------------------------------------------ # 3. GPU-decorated transcription # ------------------------------------------------------------------ @spaces.GPU def transcribe(audio): """ audio: filepath (upload) or (sr, data) tuple (mic) returns: transcribed text """ if audio is None: return "⚠️ No audio received." # ---- handle microphone ---- if isinstance(audio, tuple): sr, data = audio data = data.astype(np.float32) if np.abs(data).max() > 1.0: data /= np.abs(data).max() with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: import soundfile as sf sf.write(tmp.name, data, sr) audio_path = tmp.name else: audio_path = audio # ---- run Whisper on GPU ---- try: model = _load_model() result = model.transcribe(audio_path, fp16=True) # fp16 OK on GPU text = result["text"].strip() return text if text else "🤷‍♂️ No speech detected." except Exception as e: return f"❌ Error: {e}" finally: if audio_path != audio and os.path.exists(audio_path): os.unlink(audio_path) # ------------------------------------------------------------------ # 4. Gradio UI (unchanged) # ------------------------------------------------------------------ demo = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"), outputs=gr.Textbox(label="Transcription", lines=6), title="🎙️ Whisper Audio-to-Text (ZeroGPU)", description="Upload or record audio → instant transcription on A100.", ) if __name__ == "__main__": demo.launch()