Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
| import torch | |
| import numpy as np | |
| import soundfile as sf | |
| import io | |
| st.title("Syllables per Second Calculator") | |
| st.write("Upload an audio file to calculate the number of 'p', 't', and 'k' syllables per second.") | |
| def get_syllables_per_second(audio_file): | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft") | |
| audio_input, sample_rate = sf.read(io.BytesIO(audio_file.read())) | |
| if audio_input.ndim > 1 and audio_input.shape[1] == 2: | |
| audio_input = np.mean(audio_input, axis=1) | |
| input_values = processor(audio_input, return_tensors="pt").input_values | |
| with torch.no_grad(): | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids, output_char_offsets=True) | |
| offsets = transcription['char_offsets'] | |
| audio_duration = len(audio_input) / sample_rate | |
| syllable_count = sum(1 for item in offsets[0] if item['char'] in ['p', 't', 'k']) | |
| syllables_per_second = syllable_count / audio_duration | |
| return syllables_per_second | |
| uploaded_file = st.file_uploader("Choose an audio file", type=["wav"]) | |
| if uploaded_file is not None: | |
| with st.spinner("Processing the audio file..."): | |
| result = get_syllables_per_second(uploaded_file) | |
| st.write("Syllables per second: ", result) |