Spaces:
Running
Running
File size: 2,735 Bytes
c72ac42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
"""
Whisper Audio-to-Text β ZeroGPU edition
Runs on π€ Spaces with ZeroGPU (A100) accelerator
"""
import os
import tempfile
import gradio as gr
import whisper
import numpy as np
from huggingface_hub import hf_hub_download
# ------------------------------------------------------------------
# 1. ZeroGPU decorator
# ------------------------------------------------------------------
import spaces # pip install huggingface-hub>=0.16
# ------------------------------------------------------------------
# 2. Load model once per GPU worker
# ------------------------------------------------------------------
MODEL_ID = "openai/whisper-base" # pick any HF whisper ckpt
MODEL = None
def _load_model():
global MODEL
if MODEL is None:
# download weights from HF hub (cached)
ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin")
MODEL = whisper.load_model("base") # still uses same weights
return MODEL
# ------------------------------------------------------------------
# 3. GPU-decorated transcription
# ------------------------------------------------------------------
@spaces.GPU
def transcribe(audio):
"""
audio: filepath (upload) or (sr, data) tuple (mic)
returns: transcribed text
"""
if audio is None:
return "β οΈ No audio received."
# ---- handle microphone ----
if isinstance(audio, tuple):
sr, data = audio
data = data.astype(np.float32)
if np.abs(data).max() > 1.0:
data /= np.abs(data).max()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
import soundfile as sf
sf.write(tmp.name, data, sr)
audio_path = tmp.name
else:
audio_path = audio
# ---- run Whisper on GPU ----
try:
model = _load_model()
result = model.transcribe(audio_path, fp16=True) # fp16 OK on GPU
text = result["text"].strip()
return text if text else "π€·ββοΈ No speech detected."
except Exception as e:
return f"β Error: {e}"
finally:
if audio_path != audio and os.path.exists(audio_path):
os.unlink(audio_path)
# ------------------------------------------------------------------
# 4. Gradio UI (unchanged)
# ------------------------------------------------------------------
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
outputs=gr.Textbox(label="Transcription", lines=6),
title="ποΈ Whisper Audio-to-Text (ZeroGPU)",
description="Upload or record audio β instant transcription on A100.",
)
if __name__ == "__main__":
demo.launch() |