Spaces:
Runtime error
Runtime error
| import os | |
| import shutil | |
| import numpy as np | |
| import tensorflow as tf | |
| from tensorflow import keras | |
| from pathlib import Path | |
| from IPython.display import display, Audio | |
| import numpy as np | |
| import tensorflow as tf | |
| import gradio as gr | |
| from huggingface_hub import from_pretrained_keras | |
| import cv2 | |
| from IPython.display import Audio | |
| classes_names = ['Benjamin_Netanyau', 'Jens_Stoltenberg', 'Julia_Gillard', 'Magaret_Tarcher', 'Nelson_Mandela'] | |
| # Percentage of samples to use for validation | |
| # VALID_SPLIT = 0.1 | |
| # Seed to use when shuffling the dataset and the noise | |
| # SHUFFLE_SEED = 43 | |
| # The sampling rate to use. | |
| # This is the one used in all of the audio samples. | |
| # We will resample all of the noise to this sampling rate. | |
| # This will also be the output size of the audio wave samples | |
| # (since all samples are of 1 second long) | |
| SAMPLING_RATE = 16000 | |
| # The factor to multiply the noise with according to: | |
| # noisy_sample = sample + noise * prop * scale | |
| # where prop = sample_amplitude / noise_amplitude | |
| # SCALE = 0.5 | |
| # test_ds = paths_and_labels_to_dataset(valid_audio_paths, valid_labels) | |
| # test_ds = test_ds.shuffle(buffer_size=BATCH_SIZE * 8, seed=SHUFFLE_SEED).batch( | |
| # BATCH_SIZE | |
| # ) | |
| # test_ds = test_ds.map(lambda x, y: (add_noise(x, noises, scale=SCALE), y)) | |
| model = from_pretrained_keras("keras-io/speaker-recognition") | |
| def path_to_audio(path): | |
| """Reads and decodes an audio file.""" | |
| audio = tf.io.read_file(path) | |
| audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE) | |
| return audio | |
| def audio_to_fft(audio): | |
| # Since tf.signal.fft applies FFT on the innermost dimension, | |
| # we need to squeeze the dimensions and then expand them again | |
| # after FFT | |
| audio = tf.squeeze(audio, axis=-1) | |
| fft = tf.signal.fft( | |
| tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64) | |
| ) | |
| fft = tf.expand_dims(fft, axis=-1) | |
| # print("audio.shape[1]", audio.shape) | |
| # Return the absolute value of the first half of the FFT | |
| # which represents the positive frequencies | |
| return tf.math.abs(fft[:, : (audio.shape[1] // 2), :]) | |
| actual_audio_path = '/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Benjamin_Netanyau/260.wav' | |
| # print(path_to_audio(actual_audio_path).shape) | |
| # print(actual_audio_path.shape) | |
| def predict(actual_audio_path, actual_label): | |
| path_of_actual_audio = path_to_audio(actual_audio_path) | |
| actual_audio = tf.expand_dims(path_of_actual_audio, axis=0) | |
| # Get the signal FFT | |
| ffts = audio_to_fft(actual_audio) | |
| # Predict | |
| y_pred = model.predict(ffts) | |
| y_pred = np.argmax(y_pred, axis=-1) | |
| # print(y_pred) | |
| return classes_names[y_pred[0]], actual_audio_path | |
| # the app takes one AUDIO to be recognised | |
| input = [gr.inputs.Audio(source="upload", type="filepath", label="Take audio sample"), gr.inputs.Textbox(label="Actual Speaker")] | |
| # the app outputs two segmented images | |
| output = [gr.outputs.Textbox(label="Predicted Speaker"), gr.outputs.Audio(label="Corresponding Audio")] | |
| # it's good practice to pass examples, description and a title to guide users | |
| examples = [['audios/260.wav', 'Benjamin_Netanyau'], | |
| ['audios/611.wav', 'Jens_Stoltenberg'], | |
| ['audios/65.wav', 'Julia_Gillard'], | |
| ['audios/1083.wav', 'Magaret_Tarcher'], | |
| ['audios/605.wav', 'Nelson_Mandela']] | |
| title = "Speaker Recognition" | |
| description = "Select the noisy audio samples from examples to check whether the speaker recognised by the model is correct or not even in presence of noise !!!" | |
| gr.Interface(fn=predict, inputs = input, outputs = output, examples=examples, allow_flagging=False, analytics_enabled=False, | |
| title=title, description=description, article="Space By: <u><a href='https://github.com/robotjellyzone'><b>Kavya Bisht</b></a></u> \n Based on <a href='https://keras.io/examples/audio/speaker_recognition_using_cnn/'><b>this notebook</b></a>").launch(enable_queue=True, debug=True) |