Spaces:

IJ-Workshop
/

EspeMoe-space

Sleeping

File size: 1,575 Bytes

0ad12f7
e64c5ed
0ad12f7
e64c5ed
 
0ad12f7
 
 
 
 
 
 
 
 
 
 
 
e64c5ed
0ad12f7
e64c5ed
 
0ad12f7
e64c5ed
 
0ad12f7
e64c5ed
0ad12f7
e64c5ed
0ad12f7
 
 
 
e64c5ed
 
 
 
 
0ad12f7
e64c5ed
 
0ad12f7
e64c5ed
 
0ad12f7

import os
import gradio as gr
import torch
from transformers import pipeline

MODEL_ID = "openai/whisper-small"

def load_asr():
    # Prefer GPU if available, else CPU. For transformers pipelines:
    # device: int index for CUDA, or -1 for CPU.
    device = 0 if torch.cuda.is_available() else -1
    print(f"🎤 Loading transcription pipeline on {'GPU' if device == 0 else 'CPU'}...")
    return pipeline(
        task="automatic-speech-recognition",
        model=MODEL_ID,
        device=device
    )

asr = load_asr()

def transcribe_audio(audio_file_path):
    if not audio_file_path:
        return "Please upload an audio file."
    if not os.path.exists(audio_file_path):
        return f"Error: file not found at {audio_file_path}"

    print(f"→ Transcribing: {audio_file_path}")
    try:
        # chunk_length_s works with Whisper in transformers
        result = asr(audio_file_path, chunk_length_s=30, return_timestamps=True)
        # result is a dict with "text" and possibly "chunks"
        return result.get("text", "").strip() or "(No text recognized)"
    except Exception as e:
        return f"Error during transcription: {e}"

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath", label="Upload audio (MP3/WAV)"),
    outputs=gr.Textbox(label="Transcription"),
    title="Audio Transcription Pipeline",
    description="Upload an audio file and get a Whisper-small transcription.",
)

if __name__ == "__main__":
    # Bind to all interfaces for Docker/Spaces
    iface.launch(server_name="0.0.0.0", server_port=7860)