Spaces:

Arnavkumar01
/

browser_calling_Agent

Running

App Files Files Community

Arnavkumar01 commited on 29 days ago

Commit

b46da1b

1 Parent(s): f65990c

change in main.py to explicitely tell Open AI what the format for audio is

Browse files

Files changed (1) hide show

main.py +24 -20

main.py CHANGED Viewed

@@ -101,15 +101,18 @@ You are an expert AI assistant for a premier real estate developer.
 """
-# --- AUDIO & LLM HELPERS ---
-def transcribe_audio(audio_bytes: bytes) -> str:
     for attempt in range(3):
         try:
             audio_file = io.BytesIO(audio_bytes)
-            audio_file.name = "input.audio"
             transcript = client_openai.audio.transcriptions.create(
                 model="whisper-1",
-                file=audio_file
             )
             text = transcript.text.strip()
@@ -122,13 +125,16 @@ def transcribe_audio(audio_bytes: bytes) -> str:
                 )
                 text = response.choices[0].message.content.strip()
             return text
         except Exception as e:
             logging.error(f"Transcription error (attempt {attempt+1}): {e}")
             if attempt == 2:
                 return ""
     return ""
 def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
     for attempt in range(3):
         try:
@@ -214,23 +220,23 @@ async def test_text_query_endpoint(query: TextQuery):
     return {"response": response}
-# --- GRADIO AUDIO PROCESSING (BOSS'S FIX) ---
 async def process_audio(audio_path):
-    if not audio_path:
-        return None, "No audio. Please speak."
     try:
-        # BOSS'S GENIUS: Read raw bytes directly
         with open(audio_path, "rb") as f:
             audio_bytes = f.read()
         if len(audio_bytes) == 0:
-            return None, "Empty audio."
-        # 1. Transcribe
-        user_text = await run_in_threadpool(transcribe_audio, audio_bytes)
         if not user_text:
-            return None, "Couldn't understand. Try again."
         logging.info(f"User: {user_text}")
@@ -271,16 +277,14 @@ with gr.Blocks(title="Real Estate AI") as demo:
     out_text = gr.Textbox(label="Conversation", lines=8)
     inp.change(process_audio, inp, [out_audio, out_text])
-    gr.Examples(
-        examples=[
-            ["Ongoing projects in Pune?"],
-            ["Mumbai mein upcoming flats?"],
-            ["Completed villas in Goa"]
-        ],
-        inputs=inp
-    )
 # --- MOUNT GRADIO ---

 """
+# --- FIXED: transcribe_audio accepts path + bytes ---
+def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
     for attempt in range(3):
         try:
             audio_file = io.BytesIO(audio_bytes)
+            filename = os.path.basename(audio_path)  # e.g., "audio.wav"
+            logging.info(f"Transcribing audio: {filename} ({len(audio_bytes)} bytes)")
             transcript = client_openai.audio.transcriptions.create(
                 model="whisper-1",
+                file=(filename, audio_file)  # ← Critical: gives format hint
             )
             text = transcript.text.strip()
                 )
                 text = response.choices[0].message.content.strip()
+            logging.info(f"Transcribed: {text}")
             return text
         except Exception as e:
             logging.error(f"Transcription error (attempt {attempt+1}): {e}")
             if attempt == 2:
                 return ""
     return ""
 def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
     for attempt in range(3):
         try:
     return {"response": response}
+# --- FIXED: process_audio passes path + bytes ---
 async def process_audio(audio_path):
+    if not audio_path or not os.path.exists(audio_path):
+        return None, "No valid audio file received."
     try:
+        # Read raw bytes
         with open(audio_path, "rb") as f:
             audio_bytes = f.read()
         if len(audio_bytes) == 0:
+            return None, "Empty audio file."
+        # 1. Transcribe — pass path + bytes
+        user_text = await run_in_threadpool(transcribe_audio, audio_path, audio_bytes)
         if not user_text:
+            return None, "Couldn't understand audio. Try again."
         logging.info(f"User: {user_text}")
     out_text = gr.Textbox(label="Conversation", lines=8)
+    # Only trigger on real file (not example text)
     inp.change(process_audio, inp, [out_audio, out_text])
+    # --- FIXED: Examples now use real audio files (optional) ---
+    # Remove text examples to avoid FileNotFoundError
+    # Or: Record real .wav files and upload to repo
+    # For now: disable examples
+    # gr.Examples(examples=[], inputs=inp)
 # --- MOUNT GRADIO ---