Spaces:
Running
on
Zero
Running
on
Zero
Upload 5 files
Browse files- ACLlama_el_s2s.py +1 -1
- app.py +9 -7
ACLlama_el_s2s.py
CHANGED
|
@@ -23,7 +23,7 @@ class ACLlamaConfig(LlamaConfig):
|
|
| 23 |
|
| 24 |
def load_whisper(audio_tower_name, device="cuda"):
|
| 25 |
model = WhisperModel.from_pretrained(
|
| 26 |
-
|
| 27 |
model.config.forced_decoder_ids = None
|
| 28 |
return model
|
| 29 |
|
|
|
|
| 23 |
|
| 24 |
def load_whisper(audio_tower_name, device="cuda"):
|
| 25 |
model = WhisperModel.from_pretrained(
|
| 26 |
+
audio_tower_name,torch_dtype=torch.float16,low_cpu_mem_usage=True).to(device)
|
| 27 |
model.config.forced_decoder_ids = None
|
| 28 |
return model
|
| 29 |
|
app.py
CHANGED
|
@@ -58,7 +58,7 @@ def process_audio_input(audio):
|
|
| 58 |
return None
|
| 59 |
|
| 60 |
@spaces.GPU(duration=180) # 使用ZeroGPU,3分钟超时
|
| 61 |
-
def process_audio_text(audio):
|
| 62 |
"""主要处理函数"""
|
| 63 |
global _MODEL_ON_CUDA, inference_model
|
| 64 |
|
|
@@ -123,23 +123,25 @@ init_model()
|
|
| 123 |
|
| 124 |
if __name__ == "__main__":
|
| 125 |
examples = [
|
| 126 |
-
["./show_case/1.wav"],
|
| 127 |
-
["./show_case/2.wav"],
|
| 128 |
]
|
| 129 |
|
| 130 |
iface = gr.Interface(
|
| 131 |
fn=process_audio_text,
|
| 132 |
inputs=[
|
| 133 |
-
|
| 134 |
gr.Audio(type="filepath", label="Upload Audio")
|
| 135 |
],
|
| 136 |
outputs=[
|
| 137 |
-
gr.
|
| 138 |
-
gr.
|
| 139 |
],
|
| 140 |
examples=examples,
|
|
|
|
|
|
|
| 141 |
live=False,
|
| 142 |
allow_flagging="never"
|
| 143 |
)
|
| 144 |
|
| 145 |
-
iface.launch(server_name="0.0.0.0", server_port=7860, share=
|
|
|
|
| 58 |
return None
|
| 59 |
|
| 60 |
@spaces.GPU(duration=180) # 使用ZeroGPU,3分钟超时
|
| 61 |
+
def process_audio_text(text, audio):
|
| 62 |
"""主要处理函数"""
|
| 63 |
global _MODEL_ON_CUDA, inference_model
|
| 64 |
|
|
|
|
| 123 |
|
| 124 |
if __name__ == "__main__":
|
| 125 |
examples = [
|
| 126 |
+
["", "./show_case/1.wav"],
|
| 127 |
+
["", "./show_case/2.wav"],
|
| 128 |
]
|
| 129 |
|
| 130 |
iface = gr.Interface(
|
| 131 |
fn=process_audio_text,
|
| 132 |
inputs=[
|
| 133 |
+
gr.Textbox(label="Enter text instruction", value=""),
|
| 134 |
gr.Audio(type="filepath", label="Upload Audio")
|
| 135 |
],
|
| 136 |
outputs=[
|
| 137 |
+
gr.Textbox(label="Model output"),
|
| 138 |
+
gr.Audio(label="Streamed Audio", streaming=True, autoplay=True)
|
| 139 |
],
|
| 140 |
examples=examples,
|
| 141 |
+
title="🔊 EchoX Assistant",
|
| 142 |
+
description="A multimodal AI assistant that understands speech and responds with both text and audio",
|
| 143 |
live=False,
|
| 144 |
allow_flagging="never"
|
| 145 |
)
|
| 146 |
|
| 147 |
+
iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
|