Spaces:
Running
Running
| """ | |
| Whisper Audio-to-Text β ZeroGPU edition | |
| Runs on π€ Spaces with ZeroGPU (A100) accelerator | |
| """ | |
| import os | |
| import tempfile | |
| import gradio as gr | |
| import whisper | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| # ------------------------------------------------------------------ | |
| # 1. ZeroGPU decorator | |
| # ------------------------------------------------------------------ | |
| import spaces # pip install huggingface-hub>=0.16 | |
| # ------------------------------------------------------------------ | |
| # 2. Load model once per GPU worker | |
| # ------------------------------------------------------------------ | |
| MODEL_ID = "openai/whisper-base" # pick any HF whisper ckpt | |
| MODEL = None | |
| def _load_model(): | |
| global MODEL | |
| if MODEL is None: | |
| # download weights from HF hub (cached) | |
| ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin") | |
| MODEL = whisper.load_model("base") # still uses same weights | |
| return MODEL | |
| # ------------------------------------------------------------------ | |
| # 3. GPU-decorated transcription | |
| # ------------------------------------------------------------------ | |
| def transcribe(audio): | |
| """ | |
| audio: filepath (upload) or (sr, data) tuple (mic) | |
| returns: transcribed text | |
| """ | |
| if audio is None: | |
| return "β οΈ No audio received." | |
| # ---- handle microphone ---- | |
| if isinstance(audio, tuple): | |
| sr, data = audio | |
| data = data.astype(np.float32) | |
| if np.abs(data).max() > 1.0: | |
| data /= np.abs(data).max() | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| import soundfile as sf | |
| sf.write(tmp.name, data, sr) | |
| audio_path = tmp.name | |
| else: | |
| audio_path = audio | |
| # ---- run Whisper on GPU ---- | |
| try: | |
| model = _load_model() | |
| result = model.transcribe(audio_path, fp16=True) # fp16 OK on GPU | |
| text = result["text"].strip() | |
| return text if text else "π€·ββοΈ No speech detected." | |
| except Exception as e: | |
| return f"β Error: {e}" | |
| finally: | |
| if audio_path != audio and os.path.exists(audio_path): | |
| os.unlink(audio_path) | |
| # ------------------------------------------------------------------ | |
| # 4. Gradio UI (unchanged) | |
| # ------------------------------------------------------------------ | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"), | |
| outputs=gr.Textbox(label="Transcription", lines=6), | |
| title="ποΈ Whisper Audio-to-Text (ZeroGPU)", | |
| description="Upload or record audio β instant transcription on A100.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |