Arnavkumar01 commited on
Commit
1a15b05
Β·
1 Parent(s): e141e7c

I wish I could RM/RF my way through this, but well changes were done to the voice conversion code for elevenlabs

Browse files
Files changed (1) hide show
  1. main.py +140 -131
main.py CHANGED
@@ -3,8 +3,6 @@ import io
3
  import json
4
  import re
5
  import tempfile
6
- import asyncio
7
- from typing import Optional
8
  import logging
9
  from contextlib import asynccontextmanager
10
  from fastapi import FastAPI, Request, status, Depends, Header, HTTPException
@@ -20,10 +18,15 @@ from sqlalchemy import create_engine
20
  # --- GRADIO ---
21
  import gradio as gr
22
 
23
- # --- SETUP ---
24
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
25
- logging.getLogger('tensorflow').setLevel(logging.ERROR)
26
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
27
 
28
  load_dotenv()
29
  NEON_DATABASE_URL = os.getenv("NEON_DATABASE_URL")
@@ -31,12 +34,15 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
31
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
32
  SHARED_SECRET = os.getenv("SHARED_SECRET")
33
 
34
- # --- CONFIG ---
35
  COLLECTION_NAME = "real_estate_embeddings"
36
  EMBEDDING_MODEL = "hkunlp/instructor-large"
37
- ELEVENLABS_VOICE_NAME = "Leo"
 
 
 
38
  PLANNER_MODEL = "gpt-4o-mini"
39
  ANSWERER_MODEL = "gpt-4o"
 
40
  TABLE_DESCRIPTIONS = """
41
  - "ongoing_projects_source": Details about projects currently under construction.
42
  - "upcoming_projects_source": Information on future planned projects.
@@ -46,37 +52,48 @@ TABLE_DESCRIPTIONS = """
46
  - "feedback_source": Customer feedback and ratings for projects.
47
  """
48
 
49
- # --- CLIENTS ---
 
 
50
  embeddings = None
51
  vector_store = None
52
  client_openai = OpenAI(api_key=OPENAI_API_KEY)
53
- client_elevenlabs = None # Initialize as None first
54
 
55
- # --- ADDED: DETAILED ELEVENLABS INITIALIZATION LOGGING ---
56
  try:
57
- # Log the key (partially) to verify it's being read
58
- key_preview = ELEVENLABS_API_KEY[:5] + "..." + ELEVENLABS_API_KEY[-4:] if ELEVENLABS_API_KEY and len(ELEVENLABS_API_KEY) > 9 else "None or too short"
59
- logging.info(f"Attempting to initialize ElevenLabs client with key: {key_preview}")
 
 
 
60
 
61
- # Ensure key is not None or empty before initializing
62
  if not ELEVENLABS_API_KEY:
63
- raise ValueError("ELEVENLABS_API_KEY environment variable not set or empty.")
64
 
65
  client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
66
- logging.info(f"Initialized ElevenLabs client object. Type: {type(client_elevenlabs)}")
67
 
68
- # Try accessing a simple attribute or method to confirm initialization
69
- # Note: This might make a network call during startup
70
  voices = client_elevenlabs.voices.get_all()
71
- logging.info(f"Successfully fetched {len(voices.voices)} voices from ElevenLabs.")
72
 
73
  except Exception as e:
74
- logging.error(f"Failed to initialize ElevenLabs client or fetch voices: {e}", exc_info=True)
75
- client_elevenlabs = None # Ensure it's None if init failed
76
- # --- END ADDED LOGGING ---
 
 
 
77
 
 
 
 
78
 
79
- # --- LIFESPAN ---
 
 
80
  @asynccontextmanager
81
  async def lifespan(app: FastAPI):
82
  global embeddings, vector_store
@@ -94,18 +111,12 @@ async def lifespan(app: FastAPI):
94
  yield
95
  logging.info("Shutting down.")
96
 
97
- # --- ADDED: LIBRARY VERSION LOGGING ---
98
- try:
99
- import elevenlabs
100
- logging.info(f"Found elevenlabs library version: {elevenlabs.__version__}")
101
- except ImportError:
102
- logging.error("Could not import elevenlabs library!")
103
- # --- END ADDED LOGGING ---
104
 
105
  app = FastAPI(lifespan=lifespan)
106
 
107
-
108
- # --- PROMPTS ---
 
109
  QUERY_FORMULATION_PROMPT = """
110
  You are a query analysis agent. Transform the user's query into a precise search query and determine the correct table to filter by.
111
  **Available Tables:**
@@ -116,7 +127,7 @@ You are a query analysis agent. Transform the user's query into a precise search
116
  2. If status keywords (ongoing, completed, upcoming, etc.) are present, pick the matching table.
117
  3. If no status keyword, set filter_table to null.
118
  4. Return JSON: {{"search_query": "...", "filter_table": "table_name or null"}}
119
- """
120
 
121
  ANSWER_SYSTEM_PROMPT = """
122
  You are an expert AI assistant for a premier real estate developer.
@@ -128,158 +139,154 @@ You are an expert AI assistant for a premier real estate developer.
128
  1. Match user language (Hinglish β†’ Hinglish, English β†’ English).
129
  2. Use CONTEXT if available, else use core knowledge.
130
  3. Only answer real estate questions.
131
- """
132
-
133
 
134
- # --- AUDIO & LLM HELPERS ---
 
 
135
  def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
136
  for attempt in range(3):
137
  try:
138
  audio_file = io.BytesIO(audio_bytes)
139
- filename = os.path.basename(audio_path) # e.g., "audio.wav"
140
-
141
- logging.info(f"Transcribing audio: {filename} ({len(audio_bytes)} bytes)")
142
 
 
143
  transcript = client_openai.audio.transcriptions.create(
144
  model="whisper-1",
145
- file=(filename, audio_file) # ← Critical: gives format hint
146
  )
147
  text = transcript.text.strip()
148
 
149
  # Hinglish transliteration
150
- if re.search(r'[\u0900-\u097F]', text):
151
- response = client_openai.chat.completions.create(
152
  model="gpt-4o-mini",
153
- messages=[{"role": "user", "content": f"Transliterate to Roman (Hinglish): {text}"}],
154
- temperature=0.0
 
 
155
  )
156
- text = response.choices[0].message.content.strip()
157
 
158
  logging.info(f"Transcribed: {text}")
159
  return text
160
-
161
  except Exception as e:
162
- logging.error(f"Transcription error (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
163
  if attempt == 2:
164
  return ""
165
  return ""
166
 
167
- # --- UPDATED generate_elevenlabs_sync with check ---
168
- def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
169
- # --- ADDED THIS CHECK ---
 
 
 
170
  if client_elevenlabs is None:
171
- logging.error("ElevenLabs client is not initialized. Cannot generate audio.")
172
- return b''
173
- # --- END ADDED CHECK ---
174
 
175
  for attempt in range(3):
176
  try:
177
- # This call might still fail if init succeeded but key is bad at runtime
178
- logging.info(f"Calling ElevenLabs generate for voice '{voice}'...")
179
- audio_data = client_elevenlabs.generate(
180
  text=text,
181
- voice=voice,
182
  model="eleven_multilingual_v2",
183
- output_format="mp3_44100_128"
184
  )
185
- # Check if generate returns bytes directly or needs iteration (depends on exact version/method)
186
- if isinstance(audio_data, bytes):
187
- logging.info(f"ElevenLabs generate returned {len(audio_data)} bytes.")
188
- return audio_data
189
- else:
190
- # Handle streaming iterator if necessary
191
- chunks = b""
192
- for chunk in audio_data:
193
- chunks += chunk
194
- logging.info(f"ElevenLabs generate streamed {len(chunks)} bytes.")
195
- return chunks
196
-
197
  except Exception as e:
198
- logging.error(f"ElevenLabs error during generate (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
 
 
199
  if attempt == 2:
200
- return b''
201
- return b''
202
- # --- END UPDATED FUNCTION ---
203
 
204
  async def formulate_search_plan(user_query: str) -> dict:
205
- logging.info(f"Formulating search plan for query: {user_query}")
206
  for attempt in range(3):
207
  try:
208
- # Format the prompt here with BOTH variables
209
- formatted_prompt = QUERY_FORMULATION_PROMPT.format(
210
- table_descriptions=TABLE_DESCRIPTIONS,
211
- user_query=user_query
212
  )
213
-
214
- response = await run_in_threadpool(
215
  client_openai.chat.completions.create,
216
  model=PLANNER_MODEL,
217
- messages=[{"role": "user", "content": formatted_prompt}], # Use the fully formatted prompt
218
  response_format={"type": "json_object"},
219
- temperature=0.0
220
  )
221
- # Log the raw response BEFORE trying to parse
222
- raw_response_content = response.choices[0].message.content
223
- logging.info(f"Raw Planner LLM response content: {raw_response_content}")
224
-
225
- # Try parsing
226
- plan = json.loads(raw_response_content)
227
- logging.info(f"Successfully parsed search plan: {plan}")
228
  return plan
229
  except Exception as e:
230
- # Log the specific error during parsing or API call, with traceback
231
- logging.error(f"Planner error (attempt {attempt+1}): {e}", exc_info=True)
232
  if attempt == 2:
233
- logging.warning("Planner failed after 3 attempts. Using fallback.")
234
  return {"search_query": user_query, "filter_table": None}
235
- # Fallback if loop finishes unexpectedly
236
- logging.error("Planner loop finished unexpectedly. Using fallback.")
237
  return {"search_query": user_query, "filter_table": None}
238
 
 
239
  async def get_agent_response(user_text: str) -> str:
240
  for attempt in range(3):
241
  try:
242
  plan = await formulate_search_plan(user_text)
243
- search_query = plan.get("search_query", user_text)
244
- filter_table = plan.get("filter_table")
245
- search_filter = {"source_table": filter_table} if filter_table else {}
246
 
247
  docs = await run_in_threadpool(
248
  vector_store.similarity_search,
249
- search_query, k=3, filter=search_filter
 
 
250
  )
251
  if not docs:
252
- docs = await run_in_threadpool(vector_store.similarity_search, search_query, k=3)
253
 
254
- context = "\n\n".join([d.page_content for d in docs])
255
 
256
- response = await run_in_threadpool(
257
  client_openai.chat.completions.create,
258
  model=ANSWERER_MODEL,
259
  messages=[
260
  {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
261
  {"role": "system", "content": f"CONTEXT:\n{context}"},
262
- {"role": "user", "content": f"Question: {user_text}"}
263
- ]
264
  )
265
- return response.choices[0].message.content.strip()
266
  except Exception as e:
267
- logging.error(f"RAG error (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
268
  if attempt == 2:
269
  return "Sorry, I couldn't respond. Please try again."
270
  return "Sorry, I couldn't respond."
271
 
272
 
273
- # --- AUTH ENDPOINT ---
 
 
274
  class TextQuery(BaseModel):
275
  query: str
276
 
 
277
  async def verify_token(x_auth_token: str = Header(...)):
278
  if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
279
  logging.warning("Auth failed for /test-text-query")
280
  raise HTTPException(status_code=401, detail="Invalid token")
281
  logging.info("Auth passed")
282
 
 
283
  @app.post("/test-text-query", dependencies=[Depends(verify_token)])
284
  async def test_text_query_endpoint(query: TextQuery):
285
  logging.info(f"Text query: {query.query}")
@@ -287,58 +294,59 @@ async def test_text_query_endpoint(query: TextQuery):
287
  return {"response": response}
288
 
289
 
290
- # --- GRADIO AUDIO PROCESSING ---
 
 
291
  async def process_audio(audio_path):
292
  if not audio_path or not os.path.exists(audio_path):
293
  return None, "No valid audio file received."
294
 
295
  try:
296
- # Read raw bytes
297
  with open(audio_path, "rb") as f:
298
  audio_bytes = f.read()
299
-
300
- if len(audio_bytes) == 0:
301
  return None, "Empty audio file."
302
 
303
- # 1. Transcribe β€” pass path + bytes
304
  user_text = await run_in_threadpool(transcribe_audio, audio_path, audio_bytes)
305
  if not user_text:
306
  return None, "Couldn't understand audio. Try again."
307
 
308
  logging.info(f"User: {user_text}")
309
 
310
- # 2. AI Response
311
  agent_response = await get_agent_response(user_text)
312
  if not agent_response:
313
  return None, "No response generated."
314
 
315
  logging.info(f"AI: {agent_response[:100]}...")
316
 
317
- # 3. Generate Speech
318
- ai_audio_bytes = await run_in_threadpool(
319
- generate_elevenlabs_sync, agent_response, ELEVENLABS_VOICE_NAME
320
- )
321
  if not ai_audio_bytes:
322
- # Return the text response even if TTS fails
323
- logging.error("Failed to generate voice. Returning text only.")
324
- return None, f"**You:** {user_text}\n\n**AI:** {agent_response}\n\n_(Audio generation failed)_"
325
-
 
326
 
327
- # Save to temp file
328
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
329
  f.write(ai_audio_bytes)
330
  out_path = f.name
331
- logging.info(f"Saved generated audio to temp file: {out_path}")
332
-
333
 
334
  return out_path, f"**You:** {user_text}\n\n**AI:** {agent_response}"
335
 
336
  except Exception as e:
337
- logging.error(f"Audio processing error: {e}", exc_info=True) # Added exc_info
338
  return None, f"Error: {str(e)}"
339
 
340
 
341
- # --- GRADIO UI ---
 
 
342
  with gr.Blocks(title="Real Estate AI") as demo:
343
  gr.Markdown("# Real Estate Voice Assistant")
344
  gr.Markdown("Ask about projects in Pune, Mumbai, Bengaluru, etc.")
@@ -349,11 +357,12 @@ with gr.Blocks(title="Real Estate AI") as demo:
349
 
350
  out_text = gr.Textbox(label="Conversation", lines=8)
351
 
352
- inp.change(process_audio, inp, [out_audio, out_text])
353
 
354
- # Removed examples to avoid FileNotFoundError with text inputs
355
- # gr.Examples(examples=[], inputs=inp)
356
 
357
 
358
- # --- MOUNT GRADIO ---
 
 
359
  app = gr.mount_gradio_app(app, demo, path="/")
 
3
  import json
4
  import re
5
  import tempfile
 
 
6
  import logging
7
  from contextlib import asynccontextmanager
8
  from fastapi import FastAPI, Request, status, Depends, Header, HTTPException
 
18
  # --- GRADIO ---
19
  import gradio as gr
20
 
21
+ # --------------------------------------------------------------------------- #
22
+ # CONFIGURATION
23
+ # --------------------------------------------------------------------------- #
24
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
25
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s - %(levelname)s - %(message)s",
29
+ )
30
 
31
  load_dotenv()
32
  NEON_DATABASE_URL = os.getenv("NEON_DATABASE_URL")
 
34
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
35
  SHARED_SECRET = os.getenv("SHARED_SECRET")
36
 
 
37
  COLLECTION_NAME = "real_estate_embeddings"
38
  EMBEDDING_MODEL = "hkunlp/instructor-large"
39
+
40
+ # *** HARD-CODED VOICE ID (as requested) ***
41
+ ELEVENLABS_VOICE_ID = "IvLWq57RKibBrqZGpQrC" # <-- your voice
42
+
43
  PLANNER_MODEL = "gpt-4o-mini"
44
  ANSWERER_MODEL = "gpt-4o"
45
+
46
  TABLE_DESCRIPTIONS = """
47
  - "ongoing_projects_source": Details about projects currently under construction.
48
  - "upcoming_projects_source": Information on future planned projects.
 
52
  - "feedback_source": Customer feedback and ratings for projects.
53
  """
54
 
55
+ # --------------------------------------------------------------------------- #
56
+ # CLIENTS
57
+ # --------------------------------------------------------------------------- #
58
  embeddings = None
59
  vector_store = None
60
  client_openai = OpenAI(api_key=OPENAI_API_KEY)
61
+ client_elevenlabs = None
62
 
63
+ # ---- ElevenLabs init with detailed logging ---------------------------------
64
  try:
65
+ key_preview = (
66
+ f"{ELEVENLABS_API_KEY[:5]}...{ELEVENLABS_API_KEY[-4:]}"
67
+ if ELEVENLABS_API_KEY and len(ELEVENLABS_API_KEY) > 9
68
+ else "None"
69
+ )
70
+ logging.info(f"Initializing ElevenLabs client with key: {key_preview}")
71
 
 
72
  if not ELEVENLABS_API_KEY:
73
+ raise ValueError("ELEVENLABS_API_KEY is missing or empty.")
74
 
75
  client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
76
+ logging.info(f"ElevenLabs client created – type: {type(client_elevenlabs)}")
77
 
78
+ # Verify we can list voices (optional, but proves the key works)
 
79
  voices = client_elevenlabs.voices.get_all()
80
+ logging.info(f"Fetched {len(voices.voices)} voices from ElevenLabs.")
81
 
82
  except Exception as e:
83
+ logging.error(f"ElevenLabs init failed: {e}", exc_info=True)
84
+ client_elevenlabs = None
85
+
86
+ # ---- Log SDK version -------------------------------------------------------
87
+ try:
88
+ import elevenlabs
89
 
90
+ logging.info(f"elevenlabs SDK version: {elevenlabs.__version__}")
91
+ except Exception:
92
+ logging.error("Could not import elevenlabs package.")
93
 
94
+ # --------------------------------------------------------------------------- #
95
+ # FASTAPI APP
96
+ # --------------------------------------------------------------------------- #
97
  @asynccontextmanager
98
  async def lifespan(app: FastAPI):
99
  global embeddings, vector_store
 
111
  yield
112
  logging.info("Shutting down.")
113
 
 
 
 
 
 
 
 
114
 
115
  app = FastAPI(lifespan=lifespan)
116
 
117
+ # --------------------------------------------------------------------------- #
118
+ # PROMPTS
119
+ # --------------------------------------------------------------------------- #
120
  QUERY_FORMULATION_PROMPT = """
121
  You are a query analysis agent. Transform the user's query into a precise search query and determine the correct table to filter by.
122
  **Available Tables:**
 
127
  2. If status keywords (ongoing, completed, upcoming, etc.) are present, pick the matching table.
128
  3. If no status keyword, set filter_table to null.
129
  4. Return JSON: {{"search_query": "...", "filter_table": "table_name or null"}}
130
+ """.strip()
131
 
132
  ANSWER_SYSTEM_PROMPT = """
133
  You are an expert AI assistant for a premier real estate developer.
 
139
  1. Match user language (Hinglish β†’ Hinglish, English β†’ English).
140
  2. Use CONTEXT if available, else use core knowledge.
141
  3. Only answer real estate questions.
142
+ """.strip()
 
143
 
144
+ # --------------------------------------------------------------------------- #
145
+ # AUDIO & LLM HELPERS
146
+ # --------------------------------------------------------------------------- #
147
  def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
148
  for attempt in range(3):
149
  try:
150
  audio_file = io.BytesIO(audio_bytes)
151
+ filename = os.path.basename(audio_path)
 
 
152
 
153
+ logging.info(f"Transcribing {filename} ({len(audio_bytes)} bytes)")
154
  transcript = client_openai.audio.transcriptions.create(
155
  model="whisper-1",
156
+ file=(filename, audio_file),
157
  )
158
  text = transcript.text.strip()
159
 
160
  # Hinglish transliteration
161
+ if re.search(r"[\u0900-\u097F]", text):
162
+ resp = client_openai.chat.completions.create(
163
  model="gpt-4o-mini",
164
+ messages=[
165
+ {"role": "user", "content": f"Transliterate to Roman (Hinglish): {text}"}
166
+ ],
167
+ temperature=0.0,
168
  )
169
+ text = resp.choices[0].message.content.strip()
170
 
171
  logging.info(f"Transcribed: {text}")
172
  return text
 
173
  except Exception as e:
174
+ logging.error(f"Transcription error (attempt {attempt + 1}): {e}", exc_info=True)
175
  if attempt == 2:
176
  return ""
177
  return ""
178
 
179
+
180
+ def generate_elevenlabs_sync(text: str) -> bytes:
181
+ """
182
+ Uses the **hard-coded voice ID** and the correct SDK method
183
+ `client.text_to_speech.convert`.
184
+ """
185
  if client_elevenlabs is None:
186
+ logging.error("ElevenLabs client not initialized – skipping TTS.")
187
+ return b""
 
188
 
189
  for attempt in range(3):
190
  try:
191
+ logging.info("Calling ElevenLabs text_to_speech.convert...")
192
+ stream = client_elevenlabs.text_to_speech.convert(
193
+ voice_id=ELEVENLABS_VOICE_ID,
194
  text=text,
 
195
  model="eleven_multilingual_v2",
196
+ output_format="mp3_44100_128",
197
  )
198
+ # The SDK returns a generator of bytes – collect everything
199
+ audio_bytes = b""
200
+ for chunk in stream:
201
+ if chunk:
202
+ audio_bytes += chunk
203
+ logging.info(f"TTS returned {len(audio_bytes)} bytes.")
204
+ return audio_bytes
 
 
 
 
 
205
  except Exception as e:
206
+ logging.error(
207
+ f"ElevenLabs TTS error (attempt {attempt + 1}): {e}", exc_info=True
208
+ )
209
  if attempt == 2:
210
+ return b""
211
+ return b""
212
+
213
 
214
  async def formulate_search_plan(user_query: str) -> dict:
215
+ logging.info(f"Formulating search plan for: {user_query}")
216
  for attempt in range(3):
217
  try:
218
+ formatted = QUERY_FORMULATION_PROMPT.format(
219
+ table_descriptions=TABLE_DESCRIPTIONS, user_query=user_query
 
 
220
  )
221
+ resp = await run_in_threadpool(
 
222
  client_openai.chat.completions.create,
223
  model=PLANNER_MODEL,
224
+ messages=[{"role": "user", "content": formatted}],
225
  response_format={"type": "json_object"},
226
+ temperature=0.0,
227
  )
228
+ raw = resp.choices[0].message.content
229
+ logging.info(f"Planner raw response: {raw}")
230
+ plan = json.loads(raw)
231
+ logging.info(f"Parsed plan: {plan}")
 
 
 
232
  return plan
233
  except Exception as e:
234
+ logging.error(f"Planner error (attempt {attempt + 1}): {e}", exc_info=True)
 
235
  if attempt == 2:
 
236
  return {"search_query": user_query, "filter_table": None}
 
 
237
  return {"search_query": user_query, "filter_table": None}
238
 
239
+
240
  async def get_agent_response(user_text: str) -> str:
241
  for attempt in range(3):
242
  try:
243
  plan = await formulate_search_plan(user_text)
244
+ search_q = plan.get("search_query", user_text)
245
+ filter_tbl = plan.get("filter_table")
246
+ search_filter = {"source_table": filter_tbl} if filter_tbl else {}
247
 
248
  docs = await run_in_threadpool(
249
  vector_store.similarity_search,
250
+ search_q,
251
+ k=3,
252
+ filter=search_filter,
253
  )
254
  if not docs:
255
+ docs = await run_in_threadpool(vector_store.similarity_search, search_q, k=3)
256
 
257
+ context = "\n\n".join(d.page_content for d in docs)
258
 
259
+ resp = await run_in_threadpool(
260
  client_openai.chat.completions.create,
261
  model=ANSWERER_MODEL,
262
  messages=[
263
  {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
264
  {"role": "system", "content": f"CONTEXT:\n{context}"},
265
+ {"role": "user", "content": f"Question: {user_text}"},
266
+ ],
267
  )
268
+ return resp.choices[0].message.content.strip()
269
  except Exception as e:
270
+ logging.error(f"RAG error (attempt {attempt + 1}): {e}", exc_info=True)
271
  if attempt == 2:
272
  return "Sorry, I couldn't respond. Please try again."
273
  return "Sorry, I couldn't respond."
274
 
275
 
276
+ # --------------------------------------------------------------------------- #
277
+ # AUTH ENDPOINT
278
+ # --------------------------------------------------------------------------- #
279
  class TextQuery(BaseModel):
280
  query: str
281
 
282
+
283
  async def verify_token(x_auth_token: str = Header(...)):
284
  if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
285
  logging.warning("Auth failed for /test-text-query")
286
  raise HTTPException(status_code=401, detail="Invalid token")
287
  logging.info("Auth passed")
288
 
289
+
290
  @app.post("/test-text-query", dependencies=[Depends(verify_token)])
291
  async def test_text_query_endpoint(query: TextQuery):
292
  logging.info(f"Text query: {query.query}")
 
294
  return {"response": response}
295
 
296
 
297
+ # --------------------------------------------------------------------------- #
298
+ # GRADIO PIPELINE
299
+ # --------------------------------------------------------------------------- #
300
  async def process_audio(audio_path):
301
  if not audio_path or not os.path.exists(audio_path):
302
  return None, "No valid audio file received."
303
 
304
  try:
305
+ # ---- 1. READ RAW BYTES ------------------------------------------------
306
  with open(audio_path, "rb") as f:
307
  audio_bytes = f.read()
308
+ if not audio_bytes:
 
309
  return None, "Empty audio file."
310
 
311
+ # ---- 2. TRANSCRIBE ----------------------------------------------------
312
  user_text = await run_in_threadpool(transcribe_audio, audio_path, audio_bytes)
313
  if not user_text:
314
  return None, "Couldn't understand audio. Try again."
315
 
316
  logging.info(f"User: {user_text}")
317
 
318
+ # ---- 3. GET AI RESPONSE -----------------------------------------------
319
  agent_response = await get_agent_response(user_text)
320
  if not agent_response:
321
  return None, "No response generated."
322
 
323
  logging.info(f"AI: {agent_response[:100]}...")
324
 
325
+ # ---- 4. TEXT-TO-SPEECH ------------------------------------------------
326
+ ai_audio_bytes = await run_in_threadpool(generate_elevenlabs_sync, agent_response)
 
 
327
  if not ai_audio_bytes:
328
+ logging.error("TTS failed – returning text only.")
329
+ return (
330
+ None,
331
+ f"**You:** {user_text}\n\n**AI:** {agent_response}\n\n_(Audio generation failed)_",
332
+ )
333
 
334
+ # Save to a temporary file for Gradio
335
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
336
  f.write(ai_audio_bytes)
337
  out_path = f.name
338
+ logging.info(f"Saved TTS audio to {out_path}")
 
339
 
340
  return out_path, f"**You:** {user_text}\n\n**AI:** {agent_response}"
341
 
342
  except Exception as e:
343
+ logging.error(f"Audio processing error: {e}", exc_info=True)
344
  return None, f"Error: {str(e)}"
345
 
346
 
347
+ # --------------------------------------------------------------------------- #
348
+ # GRADIO UI
349
+ # --------------------------------------------------------------------------- #
350
  with gr.Blocks(title="Real Estate AI") as demo:
351
  gr.Markdown("# Real Estate Voice Assistant")
352
  gr.Markdown("Ask about projects in Pune, Mumbai, Bengaluru, etc.")
 
357
 
358
  out_text = gr.Textbox(label="Conversation", lines=8)
359
 
360
+ inp.change(process_audio, inputs=inp, outputs=[out_audio, out_text])
361
 
362
+ # No examples – they caused FileNotFound errors when clicking text.
 
363
 
364
 
365
+ # --------------------------------------------------------------------------- #
366
+ # MOUNT GRADIO
367
+ # --------------------------------------------------------------------------- #
368
  app = gr.mount_gradio_app(app, demo, path="/")