Arnavkumar01 commited on
Commit
b46da1b
·
1 Parent(s): f65990c

change in main.py to explicitely tell Open AI what the format for audio is

Browse files
Files changed (1) hide show
  1. main.py +24 -20
main.py CHANGED
@@ -101,15 +101,18 @@ You are an expert AI assistant for a premier real estate developer.
101
  """
102
 
103
 
104
- # --- AUDIO & LLM HELPERS ---
105
- def transcribe_audio(audio_bytes: bytes) -> str:
106
  for attempt in range(3):
107
  try:
108
  audio_file = io.BytesIO(audio_bytes)
109
- audio_file.name = "input.audio"
 
 
 
110
  transcript = client_openai.audio.transcriptions.create(
111
  model="whisper-1",
112
- file=audio_file
113
  )
114
  text = transcript.text.strip()
115
 
@@ -122,13 +125,16 @@ def transcribe_audio(audio_bytes: bytes) -> str:
122
  )
123
  text = response.choices[0].message.content.strip()
124
 
 
125
  return text
 
126
  except Exception as e:
127
  logging.error(f"Transcription error (attempt {attempt+1}): {e}")
128
  if attempt == 2:
129
  return ""
130
  return ""
131
 
 
132
  def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
133
  for attempt in range(3):
134
  try:
@@ -214,23 +220,23 @@ async def test_text_query_endpoint(query: TextQuery):
214
  return {"response": response}
215
 
216
 
217
- # --- GRADIO AUDIO PROCESSING (BOSS'S FIX) ---
218
  async def process_audio(audio_path):
219
- if not audio_path:
220
- return None, "No audio. Please speak."
221
 
222
  try:
223
- # BOSS'S GENIUS: Read raw bytes directly
224
  with open(audio_path, "rb") as f:
225
  audio_bytes = f.read()
226
 
227
  if len(audio_bytes) == 0:
228
- return None, "Empty audio."
229
 
230
- # 1. Transcribe
231
- user_text = await run_in_threadpool(transcribe_audio, audio_bytes)
232
  if not user_text:
233
- return None, "Couldn't understand. Try again."
234
 
235
  logging.info(f"User: {user_text}")
236
 
@@ -271,16 +277,14 @@ with gr.Blocks(title="Real Estate AI") as demo:
271
 
272
  out_text = gr.Textbox(label="Conversation", lines=8)
273
 
 
274
  inp.change(process_audio, inp, [out_audio, out_text])
275
 
276
- gr.Examples(
277
- examples=[
278
- ["Ongoing projects in Pune?"],
279
- ["Mumbai mein upcoming flats?"],
280
- ["Completed villas in Goa"]
281
- ],
282
- inputs=inp
283
- )
284
 
285
 
286
  # --- MOUNT GRADIO ---
 
101
  """
102
 
103
 
104
+ # --- FIXED: transcribe_audio accepts path + bytes ---
105
+ def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
106
  for attempt in range(3):
107
  try:
108
  audio_file = io.BytesIO(audio_bytes)
109
+ filename = os.path.basename(audio_path) # e.g., "audio.wav"
110
+
111
+ logging.info(f"Transcribing audio: {filename} ({len(audio_bytes)} bytes)")
112
+
113
  transcript = client_openai.audio.transcriptions.create(
114
  model="whisper-1",
115
+ file=(filename, audio_file) # ← Critical: gives format hint
116
  )
117
  text = transcript.text.strip()
118
 
 
125
  )
126
  text = response.choices[0].message.content.strip()
127
 
128
+ logging.info(f"Transcribed: {text}")
129
  return text
130
+
131
  except Exception as e:
132
  logging.error(f"Transcription error (attempt {attempt+1}): {e}")
133
  if attempt == 2:
134
  return ""
135
  return ""
136
 
137
+
138
  def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
139
  for attempt in range(3):
140
  try:
 
220
  return {"response": response}
221
 
222
 
223
+ # --- FIXED: process_audio passes path + bytes ---
224
  async def process_audio(audio_path):
225
+ if not audio_path or not os.path.exists(audio_path):
226
+ return None, "No valid audio file received."
227
 
228
  try:
229
+ # Read raw bytes
230
  with open(audio_path, "rb") as f:
231
  audio_bytes = f.read()
232
 
233
  if len(audio_bytes) == 0:
234
+ return None, "Empty audio file."
235
 
236
+ # 1. Transcribe — pass path + bytes
237
+ user_text = await run_in_threadpool(transcribe_audio, audio_path, audio_bytes)
238
  if not user_text:
239
+ return None, "Couldn't understand audio. Try again."
240
 
241
  logging.info(f"User: {user_text}")
242
 
 
277
 
278
  out_text = gr.Textbox(label="Conversation", lines=8)
279
 
280
+ # Only trigger on real file (not example text)
281
  inp.change(process_audio, inp, [out_audio, out_text])
282
 
283
+ # --- FIXED: Examples now use real audio files (optional) ---
284
+ # Remove text examples to avoid FileNotFoundError
285
+ # Or: Record real .wav files and upload to repo
286
+ # For now: disable examples
287
+ # gr.Examples(examples=[], inputs=inp)
 
 
 
288
 
289
 
290
  # --- MOUNT GRADIO ---