EspeMoe-space / app.py
fdaudens's picture
Update app.py
0ad12f7 verified
import os
import gradio as gr
import torch
from transformers import pipeline
MODEL_ID = "openai/whisper-small"
def load_asr():
# Prefer GPU if available, else CPU. For transformers pipelines:
# device: int index for CUDA, or -1 for CPU.
device = 0 if torch.cuda.is_available() else -1
print(f"🎤 Loading transcription pipeline on {'GPU' if device == 0 else 'CPU'}...")
return pipeline(
task="automatic-speech-recognition",
model=MODEL_ID,
device=device
)
asr = load_asr()
def transcribe_audio(audio_file_path):
if not audio_file_path:
return "Please upload an audio file."
if not os.path.exists(audio_file_path):
return f"Error: file not found at {audio_file_path}"
print(f"→ Transcribing: {audio_file_path}")
try:
# chunk_length_s works with Whisper in transformers
result = asr(audio_file_path, chunk_length_s=30, return_timestamps=True)
# result is a dict with "text" and possibly "chunks"
return result.get("text", "").strip() or "(No text recognized)"
except Exception as e:
return f"Error during transcription: {e}"
iface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath", label="Upload audio (MP3/WAV)"),
outputs=gr.Textbox(label="Transcription"),
title="Audio Transcription Pipeline",
description="Upload an audio file and get a Whisper-small transcription.",
)
if __name__ == "__main__":
# Bind to all interfaces for Docker/Spaces
iface.launch(server_name="0.0.0.0", server_port=7860)