IFMedTechdemo commited on
Commit
c72ac42
·
verified ·
1 Parent(s): d5ca60a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Whisper Audio-to-Text – ZeroGPU edition
3
+ Runs on 🤗 Spaces with ZeroGPU (A100) accelerator
4
+ """
5
+ import os
6
+ import tempfile
7
+ import gradio as gr
8
+ import whisper
9
+ import numpy as np
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ # ------------------------------------------------------------------
13
+ # 1. ZeroGPU decorator
14
+ # ------------------------------------------------------------------
15
+ import spaces # pip install huggingface-hub>=0.16
16
+
17
+ # ------------------------------------------------------------------
18
+ # 2. Load model once per GPU worker
19
+ # ------------------------------------------------------------------
20
+ MODEL_ID = "openai/whisper-base" # pick any HF whisper ckpt
21
+ MODEL = None
22
+
23
+ def _load_model():
24
+ global MODEL
25
+ if MODEL is None:
26
+ # download weights from HF hub (cached)
27
+ ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin")
28
+ MODEL = whisper.load_model("base") # still uses same weights
29
+ return MODEL
30
+
31
+ # ------------------------------------------------------------------
32
+ # 3. GPU-decorated transcription
33
+ # ------------------------------------------------------------------
34
+ @spaces.GPU
35
+ def transcribe(audio):
36
+ """
37
+ audio: filepath (upload) or (sr, data) tuple (mic)
38
+ returns: transcribed text
39
+ """
40
+ if audio is None:
41
+ return "⚠️ No audio received."
42
+
43
+ # ---- handle microphone ----
44
+ if isinstance(audio, tuple):
45
+ sr, data = audio
46
+ data = data.astype(np.float32)
47
+ if np.abs(data).max() > 1.0:
48
+ data /= np.abs(data).max()
49
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
50
+ import soundfile as sf
51
+ sf.write(tmp.name, data, sr)
52
+ audio_path = tmp.name
53
+ else:
54
+ audio_path = audio
55
+
56
+ # ---- run Whisper on GPU ----
57
+ try:
58
+ model = _load_model()
59
+ result = model.transcribe(audio_path, fp16=True) # fp16 OK on GPU
60
+ text = result["text"].strip()
61
+ return text if text else "🤷‍♂️ No speech detected."
62
+ except Exception as e:
63
+ return f"❌ Error: {e}"
64
+ finally:
65
+ if audio_path != audio and os.path.exists(audio_path):
66
+ os.unlink(audio_path)
67
+
68
+ # ------------------------------------------------------------------
69
+ # 4. Gradio UI (unchanged)
70
+ # ------------------------------------------------------------------
71
+ demo = gr.Interface(
72
+ fn=transcribe,
73
+ inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
74
+ outputs=gr.Textbox(label="Transcription", lines=6),
75
+ title="🎙️ Whisper Audio-to-Text (ZeroGPU)",
76
+ description="Upload or record audio → instant transcription on A100.",
77
+ )
78
+
79
+ if __name__ == "__main__":
80
+ demo.launch()