fdaudens commited on
Commit
0ad12f7
·
verified ·
1 Parent(s): 2027366

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -25
app.py CHANGED
@@ -1,44 +1,46 @@
1
- with open("app.py", "w") as f:
2
- f.write("""
3
  import gradio as gr
 
4
  from transformers import pipeline
5
- import os
6
 
7
- print("🎤 Loading transcription pipeline...")
8
- try:
9
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small", device="cuda:0")
10
- except Exception as e:
11
- print(f"Could not load model on GPU: {e}. Trying on CPU.")
12
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small", device="cpu")
 
 
 
 
 
 
13
 
 
14
 
15
  def transcribe_audio(audio_file_path):
16
- if audio_file_path is None:
17
  return "Please upload an audio file."
18
-
19
  if not os.path.exists(audio_file_path):
20
- return f"Error: Audio file not found at {audio_file_path}"
21
 
22
- print(f" transcribe the audio file: {audio_file_path}")
23
  try:
24
- transcription_result = transcriber(audio_file_path, chunk_length_s=30, return_timestamps=True)
25
- return transcription_result["text"]
 
 
26
  except Exception as e:
27
  return f"Error during transcription: {e}"
28
 
29
-
30
- print("🚀 Creating Gradio interface for Transcription...")
31
  iface = gr.Interface(
32
  fn=transcribe_audio,
33
- inputs=gr.Audio(type="filepath", label="Upload Audio File"),
34
  outputs=gr.Textbox(label="Transcription"),
35
  title="Audio Transcription Pipeline",
36
- description="Upload an audio file (e.g., MP3, WAV) to get a transcription."
37
  )
38
 
39
- print("✨ Launching Gradio interface...")
40
- iface.launch()
41
- print("\n✅ Gradio interface launched.")
42
- """)
43
-
44
- print("✅ Saved Gradio app code to app.py")
 
1
+ import os
 
2
  import gradio as gr
3
+ import torch
4
  from transformers import pipeline
 
5
 
6
+ MODEL_ID = "openai/whisper-small"
7
+
8
+ def load_asr():
9
+ # Prefer GPU if available, else CPU. For transformers pipelines:
10
+ # device: int index for CUDA, or -1 for CPU.
11
+ device = 0 if torch.cuda.is_available() else -1
12
+ print(f"🎤 Loading transcription pipeline on {'GPU' if device == 0 else 'CPU'}...")
13
+ return pipeline(
14
+ task="automatic-speech-recognition",
15
+ model=MODEL_ID,
16
+ device=device
17
+ )
18
 
19
+ asr = load_asr()
20
 
21
  def transcribe_audio(audio_file_path):
22
+ if not audio_file_path:
23
  return "Please upload an audio file."
 
24
  if not os.path.exists(audio_file_path):
25
+ return f"Error: file not found at {audio_file_path}"
26
 
27
+ print(f" Transcribing: {audio_file_path}")
28
  try:
29
+ # chunk_length_s works with Whisper in transformers
30
+ result = asr(audio_file_path, chunk_length_s=30, return_timestamps=True)
31
+ # result is a dict with "text" and possibly "chunks"
32
+ return result.get("text", "").strip() or "(No text recognized)"
33
  except Exception as e:
34
  return f"Error during transcription: {e}"
35
 
 
 
36
  iface = gr.Interface(
37
  fn=transcribe_audio,
38
+ inputs=gr.Audio(type="filepath", label="Upload audio (MP3/WAV)"),
39
  outputs=gr.Textbox(label="Transcription"),
40
  title="Audio Transcription Pipeline",
41
+ description="Upload an audio file and get a Whisper-small transcription.",
42
  )
43
 
44
+ if __name__ == "__main__":
45
+ # Bind to all interfaces for Docker/Spaces
46
+ iface.launch(server_name="0.0.0.0", server_port=7860)