Spaces:

pritamdeka
/

Whisper-Audio-Transcriber-Summarizer

Sleeping

pritamdeka commited on Oct 8, 2024

Commit

4eaea04

verified ·

1 Parent(s): 4206062

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,9 @@
 import gradio as gr
-import torch
 import whisper
 import librosa
-from transformers import pipeline
-# Check if DistilWhisper is available on Hugging Face
-# This is a placeholder model name, update it with an actual distillation model if available
-# distil_whisper_model = "huggingface/distil-whisper-model"
-# If no distil version, load smaller Whisper model for speed (e.g., "base" or "tiny")
 model = whisper.load_model("tiny")
 # Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
@@ -38,16 +33,15 @@ def transcribe_audio_in_chunks(audio_file):
     if audio_file is None:
         return "No audio file provided."
-    # Check the audio file path
-    if not os.path.exists(audio_file):
-        return "The audio file does not exist or is inaccessible."
     # Chunk the audio into 5-second parts
     chunks, sr = chunk_audio(audio_file, chunk_size=5)
     # Process each chunk and append the results as real-time transcription
     transcription = ""
     for i, chunk in enumerate(chunks):
         # Transcribe each chunk
         result = model.transcribe(chunk)
         transcription += f"Chunk {i + 1}: {result['text']}\n"

 import gradio as gr
 import whisper
 import librosa
+import numpy as np
+# Load Whisper model (using tiny for faster performance)
 model = whisper.load_model("tiny")
 # Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
     if audio_file is None:
         return "No audio file provided."
     # Chunk the audio into 5-second parts
     chunks, sr = chunk_audio(audio_file, chunk_size=5)
     # Process each chunk and append the results as real-time transcription
     transcription = ""
     for i, chunk in enumerate(chunks):
+        # Convert the chunk into the correct format for Whisper (numpy array of floats)
+        chunk = np.array(chunk)
         # Transcribe each chunk
         result = model.transcribe(chunk)
         transcription += f"Chunk {i + 1}: {result['text']}\n"