Spaces:

birgermoell
/

kb-whisper-demo

Running

App Files Files Community

birgermoell commited on Feb 20

Commit

1baa979

verified ·

1 Parent(s): 9b1b90d

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -0

app.py CHANGED Viewed

@@ -1,3 +1,9 @@
 # Setup model
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -19,3 +25,90 @@ def load_model():
         torch_dtype=torch_dtype,
         device=device,
     )

+import streamlit as st
+import torch
+import base64
+import tempfile
+import os
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 # Setup model
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
         torch_dtype=torch_dtype,
         device=device,
     )
+asr_pipeline = load_model()
+st.title("Swedish Speech-to-Text Demo")
+# Audio Upload Option
+uploaded_file = st.file_uploader("Ladda upp en ljudfil", type=["wav", "mp3", "flac"])
+# JavaScript for recording audio
+audio_recorder_js = """
+<script>
+let mediaRecorder;
+let audioChunks = [];
+let isRecording = false;
+function startRecording() {
+    if (!isRecording) {
+        isRecording = true;
+        navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
+            mediaRecorder = new MediaRecorder(stream);
+            audioChunks = [];
+            mediaRecorder.ondataavailable = event => {
+                audioChunks.push(event.data);
+            };
+            mediaRecorder.onstop = () => {
+                const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
+                const reader = new FileReader();
+                reader.readAsDataURL(audioBlob);
+                reader.onloadend = () => {
+                    const base64Audio = reader.result.split(',')[1];
+                    fetch('/save_audio', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ audio: base64Audio })
+                    }).then(response => response.json()).then(data => {
+                        console.log(data);
+                        window.location.reload();
+                    });
+                };
+            };
+            mediaRecorder.start();
+        });
+    }
+}
+function stopRecording() {
+    if (isRecording) {
+        isRecording = false;
+        mediaRecorder.stop();
+    }
+}
+</script>
+<button onclick="startRecording()">🎤 Starta inspelning</button>
+<button onclick="stopRecording()">⏹️ Stoppa inspelning</button>
+"""
+st.components.v1.html(audio_recorder_js)
+# Processing audio input (uploaded file or recorded)
+audio_path = None
+if uploaded_file is not None:
+    # Save uploaded file to a temp location
+    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_audio:
+        temp_audio.write(uploaded_file.read())
+        audio_path = temp_audio.name
+elif "audio_data" in st.session_state and st.session_state["audio_data"]:
+    # Decode base64 audio from JavaScript recording
+    audio_bytes = base64.b64decode(st.session_state["audio_data"])
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
+        temp_audio.write(audio_bytes)
+        audio_path = temp_audio.name
+# Transcribe if we have audio
+if audio_path:
+    st.audio(audio_path, format="audio/wav")
+    with st.spinner("Transkriberar..."):
+        transcription = asr_pipeline(audio_path)["text"]
+    st.subheader("📜 Transkription:")
+    st.write(transcription)
+    # Cleanup temp file
+    os.remove(audio_path)