Spaces:

IJ-Workshop
/

EspeMoe-space

Sleeping

EspeMoe-space / app.py

Update app.py

0ad12f7 verified 9 days ago

1.58 kB

	import os
	import gradio as gr
	import torch
	from transformers import pipeline

	MODEL_ID = "openai/whisper-small"

	def load_asr():
	# Prefer GPU if available, else CPU. For transformers pipelines:
	# device: int index for CUDA, or -1 for CPU.
	device = 0 if torch.cuda.is_available() else -1
	print(f"🎤 Loading transcription pipeline on {'GPU' if device == 0 else 'CPU'}...")
	return pipeline(
	task="automatic-speech-recognition",
	model=MODEL_ID,
	device=device
	)

	asr = load_asr()

	def transcribe_audio(audio_file_path):
	if not audio_file_path:
	return "Please upload an audio file."
	if not os.path.exists(audio_file_path):
	return f"Error: file not found at {audio_file_path}"

	print(f"→ Transcribing: {audio_file_path}")
	try:
	# chunk_length_s works with Whisper in transformers
	result = asr(audio_file_path, chunk_length_s=30, return_timestamps=True)
	# result is a dict with "text" and possibly "chunks"
	return result.get("text", "").strip() or "(No text recognized)"
	except Exception as e:
	return f"Error during transcription: {e}"

	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(type="filepath", label="Upload audio (MP3/WAV)"),
	outputs=gr.Textbox(label="Transcription"),
	title="Audio Transcription Pipeline",
	description="Upload an audio file and get a Whisper-small transcription.",
	)

	if __name__ == "__main__":
	# Bind to all interfaces for Docker/Spaces
	iface.launch(server_name="0.0.0.0", server_port=7860)