Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,25 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import httpx
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
async def call_api(text: str, audio_path: str):
|
| 6 |
# 读取音频文件
|
|
@@ -20,32 +39,6 @@ async def call_api(text: str, audio_path: str):
|
|
| 20 |
return response.json()["result"]
|
| 21 |
|
| 22 |
|
| 23 |
-
def load_examples():
|
| 24 |
-
return [
|
| 25 |
-
["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"], # En-De
|
| 26 |
-
["Can you identify the initial word that connects to 'currency_name' in this audio clip?",
|
| 27 |
-
"./show_case/audio-1434542201-headset.wav"], # ER
|
| 28 |
-
["What do you think the speaker's message is intended to be in this audio?",
|
| 29 |
-
"./show_case/audio-1434542201-headset.wav"], # IC
|
| 30 |
-
["What does the person say?", "./show_case/p225_002.wav"], # DFake
|
| 31 |
-
["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"], # DFake
|
| 32 |
-
[
|
| 33 |
-
"What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral",
|
| 34 |
-
"./show_case/SER(emotion)_example.wav"], # SER(emotion)
|
| 35 |
-
[
|
| 36 |
-
"Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3",
|
| 37 |
-
"./show_case/SNV_example.wav"], # SNV
|
| 38 |
-
["Identify the language of the conversation you just heard.", "./show_case/SLR_example.wav"], # SLR
|
| 39 |
-
["tell the gender of the speaker in this audio.", "./show_case/SGR_018.wav"], # SGR
|
| 40 |
-
["What's the sound we're hearing in this audio from?", "./show_case/Sound_Vocal_example.wav"], # Sound_vocal
|
| 41 |
-
["What is your best guess at the setting of this sound clip?", "./show_case/Scene_example.wav"], # Sound_cochl
|
| 42 |
-
[
|
| 43 |
-
"Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]",
|
| 44 |
-
"./show_case/SG_audio_1.wav"], # SG
|
| 45 |
-
["What type of business does the first person's son have?", "./show_case/SFT_Fisher_example.wav"] # SFT_Fisher
|
| 46 |
-
]
|
| 47 |
-
|
| 48 |
-
|
| 49 |
iface = gr.Interface(
|
| 50 |
fn=call_api,
|
| 51 |
inputs=[
|
|
@@ -53,13 +46,11 @@ iface = gr.Interface(
|
|
| 53 |
gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav")
|
| 54 |
],
|
| 55 |
outputs=gr.Textbox(label="Model output"),
|
| 56 |
-
examples=
|
| 57 |
allow_flagging="never"
|
| 58 |
)
|
| 59 |
|
| 60 |
-
# Add a button to load examples
|
| 61 |
-
iface.add_button("Show Example", load_examples)
|
| 62 |
iface.launch()
|
| 63 |
-
|
| 64 |
if __name__ == '__main__':
|
|
|
|
| 65 |
pass
|
|
|
|
| 1 |
+
# frontend.py
|
| 2 |
import gradio as gr
|
| 3 |
import httpx
|
| 4 |
|
| 5 |
+
examples = [
|
| 6 |
+
["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"], # En-De
|
| 7 |
+
["Can you identify the initial word that connects to 'currency_name' in this audio clip?", "./show_case/audio-1434542201-headset.wav"], # ER
|
| 8 |
+
["What do you think the speaker's message is intended to be in this audio?", "./show_case/audio-1434542201-headset.wav"], # IC
|
| 9 |
+
["What does the person say?", "./show_case/p225_002.wav"], # DFake
|
| 10 |
+
# ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Real.wav"], # DFake
|
| 11 |
+
["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"], # DFake
|
| 12 |
+
["What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral", "./show_case/SER(emotion)_example.wav"], #SER(emotion)
|
| 13 |
+
# ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/SVD_14154_file31512.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav"], # SVD
|
| 14 |
+
["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3", "./show_case/SNV_example.wav"], #SNV
|
| 15 |
+
["Identify the language of the conversation you just heard.","./show_case/SLR_example.wav"], #SLR
|
| 16 |
+
["tell the gender of the speaker in this audio.","./show_case/SGR_018.wav"], #SGR
|
| 17 |
+
["What's the sound we're hearing in this audio from?","./show_case/Sound_Vocal_example.wav"], #Sound_vocal
|
| 18 |
+
["What is your best guess at the setting of this sound clip?","./show_case/Scene_example.wav"], #Sound_cochl
|
| 19 |
+
["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]","./show_case/SG_audio_1.wav"], #SG
|
| 20 |
+
["What type of business does the first person's son have?","./show_case/SFT_Fisher_example.wav"] #SFT_Fisher
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
|
| 24 |
async def call_api(text: str, audio_path: str):
|
| 25 |
# 读取音频文件
|
|
|
|
| 39 |
return response.json()["result"]
|
| 40 |
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
iface = gr.Interface(
|
| 43 |
fn=call_api,
|
| 44 |
inputs=[
|
|
|
|
| 46 |
gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav")
|
| 47 |
],
|
| 48 |
outputs=gr.Textbox(label="Model output"),
|
| 49 |
+
examples=examples,
|
| 50 |
allow_flagging="never"
|
| 51 |
)
|
| 52 |
|
|
|
|
|
|
|
| 53 |
iface.launch()
|
|
|
|
| 54 |
if __name__ == '__main__':
|
| 55 |
+
# curl -X POST -F "text=What does the person say?" -F "audio_file=@./test_audio.wav" http://36.151.70.8:30113/process/
|
| 56 |
pass
|