Spaces:

Jaward
/

optimus

Running

App Files Files Community

Jaward commited on Aug 20, 2024

Commit

d99b477

verified ·

1 Parent(s): 3169305

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -57

app.py CHANGED Viewed

@@ -11,8 +11,6 @@ import random
 from openai import OpenAI
 import subprocess
 from starlette.requests import ClientDisconnect
-import logging
-import time
 LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
 LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
@@ -20,8 +18,6 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 default_lang = "en"
 engines = { default_lang: Model(default_lang) }
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 LANGUAGE_CODES = {
     "English": "eng",
@@ -124,76 +120,64 @@ def models(text, model="Llama 3 8B Service", seed=42):
         return output
-async def translate_speech_with_timeout(audio_file, target_language, timeout=30):
-    try:
-        language_code = LANGUAGE_CODES[target_language]
-        output_file = f"translated_audio_{int(time.time())}.wav"
-        command = [
-            "expressivity_predict",
-            audio_file,
-            "--tgt_lang", language_code,
-            "--model_name", "seamless_expressivity",
-            "--vocoder_name", "vocoder_pretssel",
-            "--gated-model-dir", "models",
-            "--output_path", output_file
-        ]
-        process = await asyncio.create_subprocess_exec(
-            *command,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE
-        )
-        try:
-            stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
-        except asyncio.TimeoutError:
-            process.kill()
-            raise Exception("Translation process timed out")
-        if process.returncode != 0:
-            raise Exception(f"Translation process failed: {stderr.decode()}")
-        if os.path.exists(output_file):
-            print(f"File created successfully: {output_file}")
-            return output_file
-        else:
-            raise Exception(f"File not found: {output_file}")
-    except Exception as e:
-        print(f"Translation error: {str(e)}")
         return None
 async def respond(audio, model, seed, target_language):
     try:
         if audio is None:
-            return None, None
         user_input = transcribe(audio)
         if not user_input:
-            return None, None
         if user_input.lower().startswith("please translate"):
-            # Use background task for translation
-            translated_audio = await translate_speech_with_timeout(audio, target_language)
-            return None, translated_audio
         else:
             reply = models(user_input, model, seed)
             communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
             with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
                 tmp_path = tmp_file.name
                 await communicate.save(tmp_path)
-            return tmp_path, None
     except ClientDisconnect:
         print("Client disconnected")
-        return None, None
     except Exception as e:
         print(f"An error occurred: {str(e)}")
-        return None, None
 def clear_history():
     global conversation_history
     conversation_history = []
-    return None, None
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
@@ -201,7 +185,6 @@ with gr.Blocks(css="style.css") as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
             select = gr.Dropdown([
                 'Llama 3 8B Service',
                 'Mixtral 8x7B',
@@ -212,11 +195,6 @@ with gr.Blocks(css="style.css") as demo:
             value="Llama 3 8B Service",
             label="Model"
             )
-            target_lang = gr.Dropdown(
-                choices=list(LANGUAGE_CODES.keys()),
-                value="German",
-                label="Target Language for Translation"
-            )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
@@ -225,19 +203,26 @@ with gr.Blocks(css="style.css") as demo:
                 value=0,
                 visible=False
             )
             clear_button = gr.Button("Clear Conversation History")
         with gr.Column(scale=1):
             output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
             translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
     input_audio.change(
         fn=respond,
         inputs=[input_audio, select, seed, target_lang],
-        outputs=[output_audio, translated_audio],
     )
-    clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio])
 if __name__ == "__main__":
-    demo.queue(concurrency_count=5, max_size=20).launch()

 from openai import OpenAI
 import subprocess
 from starlette.requests import ClientDisconnect
 LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
 LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
 default_lang = "en"
 engines = { default_lang: Model(default_lang) }
 LANGUAGE_CODES = {
     "English": "eng",
         return output
+def translate_speech(audio_file, target_language):
+    if audio_file is None:
+        return None
+    language_code = LANGUAGE_CODES[target_language]
+    output_file = "translated_audio.wav"
+    command = [
+        "expressivity_predict",
+        audio_file,
+        "--tgt_lang", language_code,
+        "--model_name", "seamless_expressivity",
+        "--vocoder_name", "vocoder_pretssel",
+        "--gated-model-dir", "models",
+        "--output_path", output_file
+    ]
+    subprocess.run(command, check=True)
+    if os.path.exists(output_file):
+        print(f"File created successfully: {output_file}")
+        return output_file
+    else:
+        print(f"File not found: {output_file}")
         return None
 async def respond(audio, model, seed, target_language):
     try:
         if audio is None:
+            return None, None, "No input detected."
         user_input = transcribe(audio)
         if not user_input:
+            return None, None, "Could not transcribe audio."
         if user_input.lower().startswith("please translate"):
+            # Extract the actual content to translate
+            content_to_translate = user_input[len("please translate"):].strip()
+            translated_audio = translate_speech(audio, target_language)
+            return None, translated_audio, f"Translated to {target_language}"
         else:
             reply = models(user_input, model, seed)
             communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
             with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
                 tmp_path = tmp_file.name
                 await communicate.save(tmp_path)
+            return tmp_path, None, "Voice assistant response"
     except ClientDisconnect:
         print("Client disconnected")
+        return None, None, "Client disconnected. Please try again."
     except Exception as e:
         print(f"An error occurred: {str(e)}")
+        return None, None, f"An error occurred: {str(e)}"
 def clear_history():
     global conversation_history
     conversation_history = []
+    return None, None, "Conversation history cleared."
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
     with gr.Row():
         with gr.Column(scale=1):
             select = gr.Dropdown([
                 'Llama 3 8B Service',
                 'Mixtral 8x7B',
             value="Llama 3 8B Service",
             label="Model"
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 value=0,
                 visible=False
             )
+            target_lang = gr.Dropdown(
+                choices=list(LANGUAGE_CODES.keys()),
+                value="German",
+                label="Target Language for Translation"
+            )
+            input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
             clear_button = gr.Button("Clear Conversation History")
         with gr.Column(scale=1):
             output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
             translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
+            status_message = gr.Textbox(label="Status", interactive=False)
     input_audio.change(
         fn=respond,
         inputs=[input_audio, select, seed, target_lang],
+        outputs=[output_audio, translated_audio, status_message],
     )
+    clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio, status_message])
 if __name__ == "__main__":
+    demo.queue(max_size=200).launch()