Spaces:

IFMedTechdemo
/

Audio2Text

Running

App Files Files Community

Audio2Text / app.py

IFMedTechdemo

Create app.py

c72ac42 verified about 1 month ago

raw

history blame contribute delete

2.74 kB

	"""
	Whisper Audio-to-Text – ZeroGPU edition
	Runs on 🤗 Spaces with ZeroGPU (A100) accelerator
	"""
	import os
	import tempfile
	import gradio as gr
	import whisper
	import numpy as np
	from huggingface_hub import hf_hub_download

	# ------------------------------------------------------------------
	# 1. ZeroGPU decorator
	# ------------------------------------------------------------------
	import spaces # pip install huggingface-hub>=0.16

	# ------------------------------------------------------------------
	# 2. Load model once per GPU worker
	# ------------------------------------------------------------------
	MODEL_ID = "openai/whisper-base" # pick any HF whisper ckpt
	MODEL = None

	def _load_model():
	global MODEL
	if MODEL is None:
	# download weights from HF hub (cached)
	ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin")
	MODEL = whisper.load_model("base") # still uses same weights
	return MODEL

	# ------------------------------------------------------------------
	# 3. GPU-decorated transcription
	# ------------------------------------------------------------------
	@spaces.GPU
	def transcribe(audio):
	"""
	audio: filepath (upload) or (sr, data) tuple (mic)
	returns: transcribed text
	"""
	if audio is None:
	return "⚠️ No audio received."

	# ---- handle microphone ----
	if isinstance(audio, tuple):
	sr, data = audio
	data = data.astype(np.float32)
	if np.abs(data).max() > 1.0:
	data /= np.abs(data).max()
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	import soundfile as sf
	sf.write(tmp.name, data, sr)
	audio_path = tmp.name
	else:
	audio_path = audio

	# ---- run Whisper on GPU ----
	try:
	model = _load_model()
	result = model.transcribe(audio_path, fp16=True) # fp16 OK on GPU
	text = result["text"].strip()
	return text if text else "🤷‍♂️ No speech detected."
	except Exception as e:
	return f"❌ Error: {e}"
	finally:
	if audio_path != audio and os.path.exists(audio_path):
	os.unlink(audio_path)

	# ------------------------------------------------------------------
	# 4. Gradio UI (unchanged)
	# ------------------------------------------------------------------
	demo = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
	outputs=gr.Textbox(label="Transcription", lines=6),
	title="🎙️ Whisper Audio-to-Text (ZeroGPU)",
	description="Upload or record audio → instant transcription on A100.",
	)

	if __name__ == "__main__":
	demo.launch()