Spaces:

FreedomIntelligence
/

EchoX

Running on Zero

tzzte commited on Sep 5

Commit

9eb296a

verified ·

1 Parent(s): 930b040

Upload 5 files

Files changed (2) hide show

ACLlama_el_s2s.py CHANGED Viewed

@@ -23,7 +23,7 @@ class ACLlamaConfig(LlamaConfig):
 def load_whisper(audio_tower_name, device="cuda"):
     model = WhisperModel.from_pretrained(
-            "openai/whisper-large-v3",torch_dtype=torch.float16,low_cpu_mem_usage=True).to(device)
     model.config.forced_decoder_ids = None
     return model

 def load_whisper(audio_tower_name, device="cuda"):
     model = WhisperModel.from_pretrained(
+            audio_tower_name,torch_dtype=torch.float16,low_cpu_mem_usage=True).to(device)
     model.config.forced_decoder_ids = None
     return model

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def process_audio_input(audio):
         return None
 @spaces.GPU(duration=180)  # 使用ZeroGPU，3分钟超时
-def process_audio_text(audio):
     """主要处理函数"""
     global _MODEL_ON_CUDA, inference_model
@@ -123,23 +123,25 @@ init_model()
 if __name__ == "__main__":
     examples = [
-        ["./show_case/1.wav"],
-        ["./show_case/2.wav"],
     ]
     iface = gr.Interface(
         fn=process_audio_text,
         inputs=[
-            # gr.Textbox(label="Enter text instruction", value=""),
             gr.Audio(type="filepath", label="Upload Audio")
         ],
         outputs=[
-            gr.Audio(label="Streamed Audio", streaming=True, autoplay=True),
-            gr.Textbox(label="Model output")
         ],
         examples=examples,
         live=False,
         allow_flagging="never"
     )
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=False)

         return None
 @spaces.GPU(duration=180)  # 使用ZeroGPU，3分钟超时
+def process_audio_text(text, audio):
     """主要处理函数"""
     global _MODEL_ON_CUDA, inference_model
 if __name__ == "__main__":
     examples = [
+        ["", "./show_case/1.wav"],
+        ["", "./show_case/2.wav"],
     ]
     iface = gr.Interface(
         fn=process_audio_text,
         inputs=[
+            gr.Textbox(label="Enter text instruction", value=""),
             gr.Audio(type="filepath", label="Upload Audio")
         ],
         outputs=[
+            gr.Textbox(label="Model output"),
+            gr.Audio(label="Streamed Audio", streaming=True, autoplay=True)
         ],
         examples=examples,
+        title="🔊 EchoX Assistant",
+        description="A multimodal AI assistant that understands speech and responds with both text and audio",
         live=False,
         allow_flagging="never"
     )
+    iface.launch(server_name="0.0.0.0", server_port=7860, share=True)