Spaces:

NexaAI
/

omni-audio-demo

Running

App Files Files Community

PerryCheng614 commited on Dec 17, 2024

Commit

c68be50

1 Parent(s): 81ebbfd

change to http

Browse files

Files changed (1) hide show

app.py +64 -38

app.py CHANGED Viewed

@@ -1,56 +1,77 @@
 import gradio as gr
-import websockets
-import asyncio
 import json
-import base64
-async def process_audio_stream(audio_path, max_tokens):
     """
-    Process audio with streaming response via WebSocket
     """
     if not audio_path:
         yield "Please upload or record an audio file first."
         return
     try:
-        # Read audio file and convert to base64 bytes
-        with open(audio_path, 'rb') as f:
-            audio_bytes = f.read()
-            base64_bytes = base64.b64encode(audio_bytes)
-        # Connect to WebSocket
-        async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-audio/') as websocket:
-            # Send binary base64 audio data as bytes
-            await websocket.send(base64_bytes)  # Send the raw base64 bytes
-            # Send parameters as JSON string
-            await websocket.send(json.dumps({
-                "prompt": "",
-                "max_tokens": max_tokens
-            }))
             # Initialize response
-            response = ""
-            # Receive streaming response
-            async for message in websocket:
-                try:
-                    data = json.loads(message)
-                    if data["status"] == "generating":
-                        response += data["token"]
-                        yield response
-                    elif data["status"] == "complete":
-                        break
-                    elif data["status"] == "error":
-                        yield f"Error: {data['error']}"
-                        break
-                except json.JSONDecodeError:
-                    continue
     except Exception as e:
-        yield f"Error connecting to server: {str(e)}"
-# Create Gradio interface
 demo = gr.Interface(
     fn=process_audio_stream,
     inputs=[
@@ -70,7 +91,6 @@ demo = gr.Interface(
     outputs=gr.Textbox(label="Response", interactive=False),
     title="NEXA OmniAudio-2.6B",
     description=f"""
     OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.
     Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>
@@ -88,4 +108,10 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.queue().launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import requests
 import json
+import os
+API_KEY = os.getenv("API_KEY")
+if not API_KEY:
+    raise ValueError("API_KEY environment variable must be set")
+def process_audio_stream(audio_path, max_tokens):
     """
+    Process audio with streaming response via HTTP
     """
     if not audio_path:
         yield "Please upload or record an audio file first."
         return
     try:
+        # Read and prepare audio file
+        with open(audio_path, 'rb') as audio_file:
+            files = {
+                'audio_file': ('audio.wav', audio_file, 'audio/wav')
+            }
+            data = {
+                'prompt': "",
+                'max_tokens': max_tokens
+            }
+            headers = {
+                'X-API-Key': API_KEY
+            }
+            # Make streaming request
+            response = requests.post(
+                'https://nexa-omni.nexa4ai.com/process-audio/',
+                files=files,
+                data=data,
+                headers=headers,
+                stream=True
+            )
+            if response.status_code != 200:
+                yield f"Error: Server returned status code {response.status_code}"
+                return
             # Initialize response
+            response_text = ""
+            token_count = 0
+            # Process the streaming response
+            for line in response.iter_lines():
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith('data: '):
+                        try:
+                            data = json.loads(line[6:])  # Skip 'data: ' prefix
+                            if data["status"] == "generating":
+                                if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
+                                    token_count += 1
+                                    continue
+                                response_text += data["token"]
+                                gr.update(value=response_text)
+                                yield response_text
+                            elif data["status"] == "complete":
+                                break
+                            elif data["status"] == "error":
+                                yield f"Error: {data['error']}"
+                                break
+                        except json.JSONDecodeError:
+                            continue
     except Exception as e:
+        yield f"Error processing request: {str(e)}"
+# Create Gradio interface with specific queue configurations
 demo = gr.Interface(
     fn=process_audio_stream,
     inputs=[
     outputs=gr.Textbox(label="Response", interactive=False),
     title="NEXA OmniAudio-2.6B",
     description=f"""
     OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.
     Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>
 )
 if __name__ == "__main__":
+    # Configure the queue for better streaming performance
+    demo.queue(
+        max_size=20,
+    ).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+    )