Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,11 +3,11 @@ import time
|
|
| 3 |
import gradio as gr
|
| 4 |
import librosa
|
| 5 |
import numpy as np
|
| 6 |
-
import soundfile as sf
|
| 7 |
from transformers import pipeline
|
| 8 |
|
| 9 |
TARGET_SAMPLE_RATE = 16_000
|
| 10 |
-
AUDIO_SECONDS_THRESHOLD =
|
| 11 |
pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
|
| 12 |
prediction = [{"score": 1, "label": "recording..."}]
|
| 13 |
|
|
@@ -27,7 +27,7 @@ def streaming_recording_fn(stream, new_chunk):
|
|
| 27 |
if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
|
| 28 |
prediction = pipe(stream)
|
| 29 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
| 30 |
-
sf.write(file_name, stream, TARGET_SAMPLE_RATE)
|
| 31 |
print(f"SAVE AUDIO: {file_name}")
|
| 32 |
print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
|
| 33 |
stream = None
|
|
@@ -49,7 +49,7 @@ def microphone_fn(waveform):
|
|
| 49 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
| 50 |
result = pipe(y)
|
| 51 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
| 52 |
-
sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
| 53 |
return {i['label']: i['score'] for i in result}
|
| 54 |
|
| 55 |
|
|
@@ -61,7 +61,7 @@ def file_fn(waveform):
|
|
| 61 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
| 62 |
result = pipe(y)
|
| 63 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
| 64 |
-
sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
| 65 |
return {i['label']: i['score'] for i in result}
|
| 66 |
|
| 67 |
|
|
@@ -98,7 +98,7 @@ with gr.Blocks() as example:
|
|
| 98 |
|
| 99 |
with gr.Blocks() as demo:
|
| 100 |
gr.TabbedInterface([file_demo, streaming_demo, microphone_demo, example],
|
| 101 |
-
["Audio file", "Streaming", "Microphone", "
|
| 102 |
|
| 103 |
if __name__ == "__main__":
|
| 104 |
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import librosa
|
| 5 |
import numpy as np
|
| 6 |
+
# import soundfile as sf
|
| 7 |
from transformers import pipeline
|
| 8 |
|
| 9 |
TARGET_SAMPLE_RATE = 16_000
|
| 10 |
+
AUDIO_SECONDS_THRESHOLD = 2
|
| 11 |
pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
|
| 12 |
prediction = [{"score": 1, "label": "recording..."}]
|
| 13 |
|
|
|
|
| 27 |
if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
|
| 28 |
prediction = pipe(stream)
|
| 29 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
| 30 |
+
# # sf.write(file_name, stream, TARGET_SAMPLE_RATE)
|
| 31 |
print(f"SAVE AUDIO: {file_name}")
|
| 32 |
print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
|
| 33 |
stream = None
|
|
|
|
| 49 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
| 50 |
result = pipe(y)
|
| 51 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
| 52 |
+
# sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
| 53 |
return {i['label']: i['score'] for i in result}
|
| 54 |
|
| 55 |
|
|
|
|
| 61 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
| 62 |
result = pipe(y)
|
| 63 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
| 64 |
+
# sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
| 65 |
return {i['label']: i['score'] for i in result}
|
| 66 |
|
| 67 |
|
|
|
|
| 98 |
|
| 99 |
with gr.Blocks() as demo:
|
| 100 |
gr.TabbedInterface([file_demo, streaming_demo, microphone_demo, example],
|
| 101 |
+
["Audio file", "Streaming", "Microphone", "Example"])
|
| 102 |
|
| 103 |
if __name__ == "__main__":
|
| 104 |
|