Spaces:

Arnavkumar01
/

browser_calling_Agent

Running

App Files Files Community

Arnavkumar01 commited on 29 days ago

Commit

e141e7c

1 Parent(s): 2f2f8a0

DO I remember why am I doing this ? Fuck no. but am I going to do this FUCK yes

Browse files

Files changed (1) hide show

main.py +71 -20

main.py CHANGED Viewed

@@ -50,7 +50,30 @@ TABLE_DESCRIPTIONS = """
 embeddings = None
 vector_store = None
 client_openai = OpenAI(api_key=OPENAI_API_KEY)
-client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
 # --- LIFESPAN ---
@@ -71,6 +94,13 @@ async def lifespan(app: FastAPI):
     yield
     logging.info("Shutting down.")
 app = FastAPI(lifespan=lifespan)
@@ -90,16 +120,14 @@ You are a query analysis agent. Transform the user's query into a precise search
 ANSWER_SYSTEM_PROMPT = """
 You are an expert AI assistant for a premier real estate developer.
-## YOUR PERSONA
-- You are professional, helpful, and highly knowledgeable. Your tone should be polite and articulate.
-## CORE BUSINESS KNOWLEDGE
-- **Operational Cities:** We are currently operational in Pune, Mumbai, Bengaluru, Delhi, Chennai, Hyderabad, Goa, Gurgaon, Kolkata.
-- **Property Types:** We offer luxury apartments, villas, and commercial properties.
-- **Budget Range:** Our residential properties typically range from 45 lakhs to 5 crores.
-## CORE RULES
-1.  **Language Adaptation:** If the user's original query was in Hinglish, respond in Hinglish. If in English, respond in English.
-2.  **Fact-Based Answers:** Use the provided CONTEXT to answer the user's question. If the context is empty, use your Core Business Knowledge.
-3.  **Stay on Topic:** Only answer questions related to real estate.
 """
@@ -136,28 +164,48 @@ def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
                 return ""
     return ""
 def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
     for attempt in range(3):
         try:
-            return client_elevenlabs.generate(
                 text=text,
                 voice=voice,
                 model="eleven_multilingual_v2",
                 output_format="mp3_44100_128"
             )
         except Exception as e:
-            logging.error(f"ElevenLabs error (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
             if attempt == 2:
                 return b''
     return b''
-# --- UPDATED formulate_search_plan with logging ---
 async def formulate_search_plan(user_query: str) -> dict:
     logging.info(f"Formulating search plan for query: {user_query}")
     for attempt in range(3):
         try:
-            # --- FIX: Format the prompt here with BOTH variables ---
             formatted_prompt = QUERY_FORMULATION_PROMPT.format(
                 table_descriptions=TABLE_DESCRIPTIONS,
                 user_query=user_query
@@ -170,7 +218,6 @@ async def formulate_search_plan(user_query: str) -> dict:
                 response_format={"type": "json_object"},
                 temperature=0.0
             )
-            # ... rest of the function ...
             # Log the raw response BEFORE trying to parse
             raw_response_content = response.choices[0].message.content
             logging.info(f"Raw Planner LLM response content: {raw_response_content}")
@@ -188,7 +235,6 @@ async def formulate_search_plan(user_query: str) -> dict:
     # Fallback if loop finishes unexpectedly
     logging.error("Planner loop finished unexpectedly. Using fallback.")
     return {"search_query": user_query, "filter_table": None}
-# --- END UPDATED FUNCTION ---
 async def get_agent_response(user_text: str) -> str:
     for attempt in range(3):
@@ -273,12 +319,17 @@ async def process_audio(audio_path):
             generate_elevenlabs_sync, agent_response, ELEVENLABS_VOICE_NAME
         )
         if not ai_audio_bytes:
-            return None, "Failed to generate voice."
         # Save to temp file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
             f.write(ai_audio_bytes)
             out_path = f.name
         return out_path, f"**You:** {user_text}\n\n**AI:** {agent_response}"
@@ -295,7 +346,7 @@ with gr.Blocks(title="Real Estate AI") as demo:
     with gr.Row():
         inp = gr.Audio(sources=["microphone"], type="filepath", label="Speak")
         out_audio = gr.Audio(label="AI Response", type="filepath")
     out_text = gr.Textbox(label="Conversation", lines=8)
     inp.change(process_audio, inp, [out_audio, out_text])

 embeddings = None
 vector_store = None
 client_openai = OpenAI(api_key=OPENAI_API_KEY)
+client_elevenlabs = None # Initialize as None first
+# --- ADDED: DETAILED ELEVENLABS INITIALIZATION LOGGING ---
+try:
+    # Log the key (partially) to verify it's being read
+    key_preview = ELEVENLABS_API_KEY[:5] + "..." + ELEVENLABS_API_KEY[-4:] if ELEVENLABS_API_KEY and len(ELEVENLABS_API_KEY) > 9 else "None or too short"
+    logging.info(f"Attempting to initialize ElevenLabs client with key: {key_preview}")
+    # Ensure key is not None or empty before initializing
+    if not ELEVENLABS_API_KEY:
+        raise ValueError("ELEVENLABS_API_KEY environment variable not set or empty.")
+    client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    logging.info(f"Initialized ElevenLabs client object. Type: {type(client_elevenlabs)}")
+    # Try accessing a simple attribute or method to confirm initialization
+    # Note: This might make a network call during startup
+    voices = client_elevenlabs.voices.get_all()
+    logging.info(f"Successfully fetched {len(voices.voices)} voices from ElevenLabs.")
+except Exception as e:
+    logging.error(f"Failed to initialize ElevenLabs client or fetch voices: {e}", exc_info=True)
+    client_elevenlabs = None # Ensure it's None if init failed
+# --- END ADDED LOGGING ---
 # --- LIFESPAN ---
     yield
     logging.info("Shutting down.")
+# --- ADDED: LIBRARY VERSION LOGGING ---
+try:
+    import elevenlabs
+    logging.info(f"Found elevenlabs library version: {elevenlabs.__version__}")
+except ImportError:
+    logging.error("Could not import elevenlabs library!")
+# --- END ADDED LOGGING ---
 app = FastAPI(lifespan=lifespan)
 ANSWER_SYSTEM_PROMPT = """
 You are an expert AI assistant for a premier real estate developer.
+## CORE KNOWLEDGE
+- Cities: Pune, Mumbai, Bengaluru, Delhi, Chennai, Hyderabad, Goa, Gurgaon, Kolkata.
+- Properties: Luxury apartments, villas, commercial.
+- Budget: 45 lakhs to 5 crores.
+## RULES
+1. Match user language (Hinglish → Hinglish, English → English).
+2. Use CONTEXT if available, else use core knowledge.
+3. Only answer real estate questions.
 """
                 return ""
     return ""
+# --- UPDATED generate_elevenlabs_sync with check ---
 def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
+    # --- ADDED THIS CHECK ---
+    if client_elevenlabs is None:
+        logging.error("ElevenLabs client is not initialized. Cannot generate audio.")
+        return b''
+    # --- END ADDED CHECK ---
     for attempt in range(3):
         try:
+            # This call might still fail if init succeeded but key is bad at runtime
+            logging.info(f"Calling ElevenLabs generate for voice '{voice}'...")
+            audio_data = client_elevenlabs.generate(
                 text=text,
                 voice=voice,
                 model="eleven_multilingual_v2",
                 output_format="mp3_44100_128"
             )
+            # Check if generate returns bytes directly or needs iteration (depends on exact version/method)
+            if isinstance(audio_data, bytes):
+                 logging.info(f"ElevenLabs generate returned {len(audio_data)} bytes.")
+                 return audio_data
+            else:
+                 # Handle streaming iterator if necessary
+                 chunks = b""
+                 for chunk in audio_data:
+                     chunks += chunk
+                 logging.info(f"ElevenLabs generate streamed {len(chunks)} bytes.")
+                 return chunks
         except Exception as e:
+            logging.error(f"ElevenLabs error during generate (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
             if attempt == 2:
                 return b''
     return b''
+# --- END UPDATED FUNCTION ---
 async def formulate_search_plan(user_query: str) -> dict:
     logging.info(f"Formulating search plan for query: {user_query}")
     for attempt in range(3):
         try:
+            # Format the prompt here with BOTH variables
             formatted_prompt = QUERY_FORMULATION_PROMPT.format(
                 table_descriptions=TABLE_DESCRIPTIONS,
                 user_query=user_query
                 response_format={"type": "json_object"},
                 temperature=0.0
             )
             # Log the raw response BEFORE trying to parse
             raw_response_content = response.choices[0].message.content
             logging.info(f"Raw Planner LLM response content: {raw_response_content}")
     # Fallback if loop finishes unexpectedly
     logging.error("Planner loop finished unexpectedly. Using fallback.")
     return {"search_query": user_query, "filter_table": None}
 async def get_agent_response(user_text: str) -> str:
     for attempt in range(3):
             generate_elevenlabs_sync, agent_response, ELEVENLABS_VOICE_NAME
         )
         if not ai_audio_bytes:
+            # Return the text response even if TTS fails
+            logging.error("Failed to generate voice. Returning text only.")
+            return None, f"**You:** {user_text}\n\n**AI:** {agent_response}\n\n_(Audio generation failed)_"
         # Save to temp file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
             f.write(ai_audio_bytes)
             out_path = f.name
+            logging.info(f"Saved generated audio to temp file: {out_path}")
         return out_path, f"**You:** {user_text}\n\n**AI:** {agent_response}"
     with gr.Row():
         inp = gr.Audio(sources=["microphone"], type="filepath", label="Speak")
         out_audio = gr.Audio(label="AI Response", type="filepath")
     out_text = gr.Textbox(label="Conversation", lines=8)
     inp.change(process_audio, inp, [out_audio, out_text])