|
|
import sounddevice as sd |
|
|
import scipy.io.wavfile as wavfile |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
from groq import Groq |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
class Recorder: |
|
|
def __init__(self, sample_rate=44100): |
|
|
self.recording = False |
|
|
self.frames = [] |
|
|
self.sample_rate = sample_rate |
|
|
self.stream = None |
|
|
|
|
|
def toggle_recording(self): |
|
|
if not self.recording: |
|
|
self.frames = [] |
|
|
self.stream = sd.InputStream(callback=self.callback, channels=2, samplerate=self.sample_rate) |
|
|
self.stream.start() |
|
|
self.recording = True |
|
|
return "Recording... Press to Stop" |
|
|
else: |
|
|
self.stream.stop() |
|
|
self.stream.close() |
|
|
self.recording = False |
|
|
return "Recording stopped. Press to Record" |
|
|
|
|
|
def callback(self, indata, frames, time, status): |
|
|
if self.recording: |
|
|
self.frames.append(indata.copy()) |
|
|
|
|
|
def save_audio(self): |
|
|
if self.frames: |
|
|
audio_data = np.concatenate(self.frames, axis=0) |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file: |
|
|
wavfile.write(temp_wav_file.name, self.sample_rate, audio_data) |
|
|
return temp_wav_file.name |
|
|
else: |
|
|
return None |
|
|
|
|
|
recorder = Recorder() |
|
|
|
|
|
def record(): |
|
|
return recorder.toggle_recording() |
|
|
|
|
|
def transcribe(): |
|
|
audio_file = recorder.save_audio() |
|
|
if audio_file: |
|
|
client = Groq(api_key="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") |
|
|
with open(audio_file, "rb") as file: |
|
|
transcription = client.audio.transcriptions.create( |
|
|
file=(audio_file, file.read()), |
|
|
model="whisper-large-v3", |
|
|
prompt="Specify context or spelling", |
|
|
response_format="json", |
|
|
language="en", |
|
|
temperature=0.0 |
|
|
) |
|
|
os.remove(audio_file) |
|
|
|
|
|
|
|
|
print(transcription) |
|
|
|
|
|
|
|
|
if hasattr(transcription, 'text'): |
|
|
return transcription.text |
|
|
else: |
|
|
return "Transcription text not found." |
|
|
|
|
|
else: |
|
|
return "No audio recorded." |
|
|
|
|
|
with gr.Blocks() as gradio_interface: |
|
|
with gr.Column(): |
|
|
record_button = gr.Button("Press to Record") |
|
|
record_button.click(fn=record, outputs=record_button) |
|
|
transcription_output = gr.Textbox(label="Transcription") |
|
|
record_button.click(fn=transcribe, outputs=transcription_output) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
gradio_interface.launch() |