ankandrew commited on
Commit
35d2caa
·
verified ·
1 Parent(s): 9d8db21

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import librosa
5
+
6
+ from torchmetrics.functional.audio.nisqa import non_intrusive_speech_quality_assessment as tm_nisqa
7
+
8
+ SR = 16000
9
+
10
+ def predict_nisqa(audio):
11
+ if isinstance(audio, tuple):
12
+ _sr, y = audio
13
+ y = librosa.resample(np.asarray(y).astype(np.float32), orig_sr=_sr, target_sr=SR)
14
+ else:
15
+ y, _ = librosa.load(audio, sr=SR, mono=True)
16
+
17
+ wav = torch.tensor(y, dtype=torch.float32)
18
+
19
+ out = tm_nisqa(wav, SR).detach().cpu().numpy().tolist()
20
+ mos, noisiness, discontinuity, coloration, loudness = out
21
+
22
+ table = {
23
+ "Metric": ["MOS (overall)", "Noisiness", "Discontinuity", "Coloration", "Loudness"],
24
+ "Score": [round(mos, 3), round(noisiness, 3), round(discontinuity, 3),
25
+ round(coloration, 3), round(loudness, 3)]
26
+ }
27
+ return table
28
+
29
+ with gr.Blocks(title="NISQA Speech Quality (MOS) Demo") as demo:
30
+ gr.Markdown(
31
+ """
32
+ # 🎧 NISQA Speech Quality (MOS)
33
+ Upload or record speech and get **MOS + quality dimensions**.
34
+ Uses NISQA v2.0 via TorchMetrics.
35
+ """
36
+ )
37
+ with gr.Row():
38
+ audio = gr.Audio(sources=["mic", "upload"], type="filepath", label="Input audio (wav/mp3/m4a...)")
39
+ btn = gr.Button("Predict")
40
+ out = gr.Dataframe(headers=["Metric", "Score"], label="Results", interactive=False)
41
+
42
+ btn.click(fn=predict_nisqa, inputs=audio, outputs=out)
43
+
44
+ if __name__ == "__main__":
45
+ demo.launch()