qwvdggg commited on
Commit
2b1c44a
Β·
verified Β·
1 Parent(s): f1feb06

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +616 -0
app.py ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, Response, send_file
2
+ import torch
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import os
5
+ import logging
6
+ import io
7
+ import numpy as np
8
+ import scipy.io.wavfile as wavfile
9
+ import soundfile as sf
10
+ from pydub import AudioSegment
11
+ import time
12
+ from functools import lru_cache
13
+ import gc
14
+ import psutil
15
+ import threading
16
+ import time
17
+ from queue import Queue
18
+ import uuid
19
+ import subprocess
20
+ import tempfile
21
+ import atexit
22
+
23
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24
+ logger = logging.getLogger(__name__)
25
+
26
+ IS_HF_SPACE = os.environ.get('SPACE_ID') is not None
27
+ HF_TOKEN = os.environ.get('HF_TOKEN')
28
+
29
+ if IS_HF_SPACE:
30
+ device = "cpu"
31
+ torch.set_num_threads(2)
32
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
33
+ logger.info("Running on Hugging Face Spaces - CPU optimized mode")
34
+ else:
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ torch.set_num_threads(4)
37
+
38
+ logger.info(f"Using device: {device}")
39
+
40
+ app = Flask(__name__)
41
+ app.config['TEMP_AUDIO_DIR'] = '/tmp/audio_responses'
42
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
43
+
44
+ stt_pipeline = None
45
+ llm_model = None
46
+ llm_tokenizer = None
47
+ tts_pipeline = None
48
+ tts_type = None
49
+
50
+ active_files = {}
51
+ file_cleanup_lock = threading.Lock()
52
+ cleanup_thread = None
53
+
54
+ def cleanup_old_files():
55
+ while True:
56
+ try:
57
+ with file_cleanup_lock:
58
+ current_time = time.time()
59
+ files_to_remove = []
60
+
61
+ for file_id, file_info in list(active_files.items()):
62
+ if current_time - file_info['created_time'] > 300:
63
+ files_to_remove.append(file_id)
64
+
65
+ for file_id in files_to_remove:
66
+ try:
67
+ if os.path.exists(active_files[file_id]['filepath']):
68
+ os.remove(active_files[file_id]['filepath'])
69
+ del active_files[file_id]
70
+ logger.info(f"Cleaned up file: {file_id}")
71
+ except Exception as e:
72
+ logger.warning(f"Cleanup error for {file_id}: {e}")
73
+ except Exception as e:
74
+ logger.error(f"Cleanup thread error: {e}")
75
+
76
+ time.sleep(60)
77
+
78
+ def start_cleanup_thread():
79
+ global cleanup_thread
80
+ if cleanup_thread is None or not cleanup_thread.is_alive():
81
+ cleanup_thread = threading.Thread(target=cleanup_old_files, daemon=True)
82
+ cleanup_thread.start()
83
+ logger.info("Cleanup thread started")
84
+
85
+ def cleanup_all_files():
86
+ try:
87
+ with file_cleanup_lock:
88
+ for file_id, file_info in active_files.items():
89
+ try:
90
+ if os.path.exists(file_info['filepath']):
91
+ os.remove(file_info['filepath'])
92
+ except:
93
+ pass
94
+ active_files.clear()
95
+
96
+ if os.path.exists(app.config['TEMP_AUDIO_DIR']):
97
+ import shutil
98
+ shutil.rmtree(app.config['TEMP_AUDIO_DIR'], ignore_errors=True)
99
+
100
+ logger.info("All temporary files cleaned up")
101
+ except Exception as e:
102
+ logger.warning(f"Final cleanup error: {e}")
103
+
104
+ atexit.register(cleanup_all_files)
105
+
106
+ def get_memory_usage():
107
+ try:
108
+ process = psutil.Process(os.getpid())
109
+ memory_info = process.memory_info()
110
+ return {
111
+ "rss_mb": memory_info.rss / 1024 / 1024,
112
+ "vms_mb": memory_info.vms / 1024 / 1024,
113
+ "available_mb": psutil.virtual_memory().available / 1024 / 1024,
114
+ "percent": psutil.virtual_memory().percent
115
+ }
116
+ except Exception as e:
117
+ logger.warning(f"Memory info error: {e}")
118
+ return {"rss_mb": 0, "vms_mb": 0, "available_mb": 0, "percent": 0}
119
+
120
+ def initialize_models():
121
+ global stt_pipeline, llm_model, llm_tokenizer, tts_pipeline, tts_type
122
+
123
+ try:
124
+ logger.info(f"Initial memory usage: {get_memory_usage()}")
125
+
126
+ if stt_pipeline is None:
127
+ logger.info("Loading Whisper-tiny STT model...")
128
+ try:
129
+ stt_pipeline = pipeline(
130
+ "automatic-speech-recognition",
131
+ model="openai/whisper-tiny",
132
+ device=device,
133
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
134
+ token=HF_TOKEN,
135
+ return_timestamps=False
136
+ )
137
+ logger.info("βœ… STT model loaded successfully")
138
+ except Exception as e:
139
+ logger.error(f"STT loading failed: {e}")
140
+ raise
141
+
142
+ gc.collect()
143
+ logger.info(f"STT loaded. Memory: {get_memory_usage()}")
144
+
145
+ if llm_model is None:
146
+ logger.info("Loading DialoGPT-small LLM...")
147
+ try:
148
+ model_name = "google/flan-t5-base"
149
+
150
+ llm_tokenizer = AutoTokenizer.from_pretrained(
151
+ model_name,
152
+ token=HF_TOKEN,
153
+ trust_remote_code=True
154
+ )
155
+
156
+ llm_model = AutoModelForSeq2SeqLM.from_pretrained(
157
+ model_name,
158
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
159
+ token=HF_TOKEN,
160
+ trust_remote_code=True
161
+ ).to(device)
162
+
163
+ if llm_tokenizer.pad_token is None:
164
+ llm_tokenizer.pad_token = llm_tokenizer.eos_token
165
+
166
+ logger.info("βœ… LLM model loaded successfully")
167
+ except Exception as e:
168
+ logger.error(f"LLM loading failed: {e}")
169
+ raise
170
+
171
+ gc.collect()
172
+ logger.info(f"LLM loaded. Memory: {get_memory_usage()}")
173
+
174
+ if tts_pipeline is None:
175
+ logger.info("Loading TTS model...")
176
+ tts_loaded = False
177
+
178
+ try:
179
+ from gtts import gTTS
180
+ tts_pipeline = "gtts"
181
+ tts_type = "gtts"
182
+ tts_loaded = True
183
+ logger.info("βœ… Using gTTS (Google Text-to-Speech)")
184
+ except ImportError:
185
+ logger.warning("gTTS not available")
186
+
187
+ if not tts_loaded:
188
+ tts_pipeline = "silent"
189
+ tts_type = "silent"
190
+ logger.warning("Using silent fallback for TTS")
191
+
192
+ gc.collect()
193
+ logger.info(f"TTS loaded. Memory: {get_memory_usage()}")
194
+
195
+ logger.info("πŸŽ‰ All models loaded successfully!")
196
+ start_cleanup_thread()
197
+
198
+ except Exception as e:
199
+ logger.error(f"❌ Model loading error: {e}")
200
+ logger.error(f"Memory usage at error: {get_memory_usage()}")
201
+ raise e
202
+
203
+ @lru_cache(maxsize=32)
204
+ def cached_generate_response(text_hash, text):
205
+ return generate_llm_response(text)
206
+
207
+ def generate_llm_response(text):
208
+ try:
209
+ if len(text) > 200:
210
+ text = text[:200]
211
+
212
+ if not text.strip():
213
+ return "I'm listening. How can I help you?"
214
+
215
+ inputs = llm_tokenizer(
216
+ text,
217
+ return_tensors="pt",
218
+ truncation=True,
219
+ padding=True,
220
+ max_length=512
221
+ )
222
+ input_ids = inputs["input_ids"].to(device)
223
+ attention_mask = inputs.get("attention_mask")
224
+ if attention_mask is not None:
225
+ attention_mask = attention_mask.to(device)
226
+
227
+ with torch.no_grad():
228
+ is_seq2seq = getattr(getattr(llm_model, "config", {}), "is_encoder_decoder", False)
229
+
230
+ gen_kwargs = dict(
231
+ max_new_tokens=50,
232
+ do_sample=True,
233
+ temperature=0.7,
234
+ top_k=50,
235
+ top_p=0.9,
236
+ no_repeat_ngram_size=2,
237
+ early_stopping=True,
238
+ pad_token_id=llm_tokenizer.eos_token_id if llm_tokenizer.pad_token_id is None else llm_tokenizer.pad_token_id,
239
+ use_cache=True
240
+ )
241
+
242
+ if is_seq2seq:
243
+ outputs_ids = llm_model.generate(
244
+ input_ids=input_ids,
245
+ attention_mask=attention_mask,
246
+ **gen_kwargs
247
+ )
248
+ else:
249
+ outputs_ids = llm_model.generate(
250
+ input_ids=input_ids,
251
+ **gen_kwargs
252
+ )
253
+
254
+ response = llm_tokenizer.decode(outputs_ids[0], skip_special_tokens=True)
255
+
256
+ del inputs, input_ids, attention_mask, outputs_ids
257
+ gc.collect()
258
+ if device == "cuda":
259
+ torch.cuda.empty_cache()
260
+
261
+ response = response.strip()
262
+ if not response or len(response) < 3:
263
+ return "I understand. What else would you like to know?"
264
+
265
+ return response
266
+
267
+ except Exception as e:
268
+ logger.error(f"LLM generation error: {e}", exc_info=True)
269
+ return "I'm having trouble processing that. Could you try again?"
270
+
271
+
272
+ def preprocess_audio_optimized(audio_bytes):
273
+ try:
274
+ logger.info(f"Processing audio: {len(audio_bytes)} bytes")
275
+
276
+ if len(audio_bytes) > 44 and audio_bytes[:4] == b'RIFF':
277
+ audio_bytes = audio_bytes[44:] # WAV header'Δ± atla
278
+ logger.info("WAV header removed")
279
+
280
+ audio_data = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
281
+
282
+ max_samples = 30 * 16000
283
+ if len(audio_data) > max_samples:
284
+ audio_data = audio_data[:max_samples]
285
+ logger.info("Audio trimmed to 30 seconds")
286
+
287
+ min_samples = int(0.5 * 16000)
288
+ if len(audio_data) < min_samples:
289
+ logger.warning(f"Audio too short: {len(audio_data)/16000:.2f} seconds")
290
+ return None, None
291
+
292
+ logger.info(f"Audio processed: {len(audio_data)/16000:.2f} seconds")
293
+ return 16000, audio_data
294
+
295
+ except Exception as e:
296
+ logger.error(f"Audio preprocessing error: {e}")
297
+ raise e
298
+
299
+ def generate_tts_audio(text):
300
+ try:
301
+ text = text.replace('\n', ' ').strip()
302
+
303
+ if len(text) > 200:
304
+ text = text[:200] + "..."
305
+
306
+ if not text:
307
+ text = "I understand."
308
+
309
+ logger.info(f"TTS generating: '{text[:50]}...'")
310
+
311
+ if tts_type == "gtts":
312
+ from gtts import gTTS
313
+
314
+ with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp_file:
315
+ try:
316
+ tts = gTTS(text=text, lang='en', slow=False)
317
+ tts.save(tmp_file.name)
318
+
319
+ from pydub import AudioSegment
320
+ audio_segment = AudioSegment.from_file(tmp_file.name, format="mp3")
321
+ audio_segment = audio_segment.set_frame_rate(16000).set_channels(1) # Mono 16kHz
322
+ wav_buffer = io.BytesIO()
323
+ audio_segment.export(wav_buffer, format="wav")
324
+ wav_data = wav_buffer.getvalue()
325
+
326
+ os.unlink(tmp_file.name)
327
+
328
+ return wav_data
329
+
330
+ if len(mp3_data) > 1000:
331
+ logger.info(f"TTS generated: {len(mp3_data)} bytes")
332
+ return mp3_data
333
+ else:
334
+ raise Exception("Generated audio too small")
335
+
336
+ except Exception as e:
337
+ if os.path.exists(tmp_file.name):
338
+ os.unlink(tmp_file.name)
339
+ raise e
340
+
341
+ logger.warning("Using silent fallback")
342
+ audio_segment = AudioSegment.from_file(tmp_file.name, format="mp3")
343
+ wav_buffer = io.BytesIO()
344
+ audio_segment.export(wav_buffer, format="wav")
345
+ return wav_buffer.getvalue()
346
+
347
+ except Exception as e:
348
+ logger.error(f"TTS error: {e}")
349
+ try:
350
+ audio_segment = AudioSegment.from_file(tmp_file.name, format="mp3")
351
+ wav_buffer = io.BytesIO()
352
+ audio_segment.export(wav_buffer, format="wav")
353
+ return wav_buffer.getvalue()
354
+ except:
355
+ return b''
356
+
357
+ @app.route('/process_audio', methods=['POST'])
358
+ def process_audio():
359
+ start_time = time.time()
360
+
361
+ if not all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline]):
362
+ logger.error("Models not ready")
363
+ return jsonify({"error": "Models are still loading, please wait..."}), 503
364
+
365
+ if not request.data:
366
+ return jsonify({"error": "No audio data received"}), 400
367
+
368
+ if len(request.data) < 1000:
369
+ return jsonify({"error": "Audio data too small"}), 400
370
+
371
+ initial_memory = get_memory_usage()
372
+ logger.info(f"🎯 Processing started. Memory: {initial_memory['rss_mb']:.1f}MB")
373
+
374
+ try:
375
+ logger.info("🎀 Converting speech to text...")
376
+ stt_start = time.time()
377
+
378
+ rate, audio_data = preprocess_audio_optimized(request.data)
379
+
380
+ if audio_data is None:
381
+ return jsonify({"error": "Invalid or too short audio"}), 400
382
+
383
+ stt_result = stt_pipeline(
384
+ {"sampling_rate": rate, "raw": audio_data},
385
+ generate_kwargs={"language": "en"}
386
+ )
387
+ transcribed_text = stt_result.get('text', '').strip()
388
+
389
+ del audio_data
390
+ gc.collect()
391
+
392
+ stt_time = time.time() - stt_start
393
+ logger.info(f"βœ… STT completed: '{transcribed_text}' ({stt_time:.2f}s)")
394
+
395
+ if not transcribed_text or len(transcribed_text) < 2:
396
+ transcribed_text = "Could you repeat that please?"
397
+
398
+ logger.info("πŸ€– Generating AI response...")
399
+ llm_start = time.time()
400
+
401
+ text_hash = hash(transcribed_text.lower())
402
+ assistant_response = cached_generate_response(text_hash, transcribed_text)
403
+
404
+ llm_time = time.time() - llm_start
405
+ logger.info(f"βœ… LLM completed: '{assistant_response}' ({llm_time:.2f}s)")
406
+
407
+ logger.info("πŸ”Š Converting to speech...")
408
+ tts_start = time.time()
409
+
410
+ audio_response = generate_tts_audio(assistant_response)
411
+
412
+ if not audio_response:
413
+ return jsonify({"error": "TTS generation failed"}), 500
414
+
415
+ tts_time = time.time() - tts_start
416
+ total_time = time.time() - start_time
417
+
418
+ gc.collect()
419
+ torch.cuda.empty_cache() if device == "cuda" else None
420
+
421
+ final_memory = get_memory_usage()
422
+ logger.info(f"βœ… Processing complete! Total: {total_time:.2f}s (STT:{stt_time:.1f}s, LLM:{llm_time:.1f}s, TTS:{tts_time:.1f}s)")
423
+ logger.info(f"Memory: {initial_memory['rss_mb']:.1f}MB β†’ {final_memory['rss_mb']:.1f}MB")
424
+
425
+ if not os.path.exists(app.config['TEMP_AUDIO_DIR']):
426
+ os.makedirs(app.config['TEMP_AUDIO_DIR'])
427
+
428
+ file_id = str(uuid.uuid4())
429
+ temp_filename = os.path.join(app.config['TEMP_AUDIO_DIR'], f"{file_id}.mp3")
430
+
431
+ temp_filename = os.path.join(app.config['TEMP_AUDIO_DIR'], f"{file_id}.wav")
432
+ with open(temp_filename, 'wb') as f:
433
+ f.write(audio_response)
434
+
435
+ with file_cleanup_lock:
436
+ active_files[file_id] = {
437
+ 'filepath': temp_filename,
438
+ 'created_time': time.time(),
439
+ 'accessed': False
440
+ }
441
+
442
+ response_data = {
443
+ 'status': 'success',
444
+ 'file_id': file_id,
445
+ 'stream_url': f'/stream_audio/{file_id}',
446
+ 'message': assistant_response,
447
+ 'transcribed': transcribed_text,
448
+ 'processing_time': round(total_time, 2)
449
+ }
450
+
451
+ return jsonify(response_data)
452
+
453
+ except Exception as e:
454
+ logger.error(f"❌ Processing error: {e}", exc_info=True)
455
+ gc.collect()
456
+ torch.cuda.empty_cache() if device == "cuda" else None
457
+
458
+ return jsonify({
459
+ "error": "Processing failed",
460
+ "details": str(e) if not IS_HF_SPACE else "Internal server error"
461
+ }), 500
462
+
463
+ @app.route('/stream_audio/<file_id>')
464
+ def stream_audio(file_id):
465
+ try:
466
+ with file_cleanup_lock:
467
+ if file_id in active_files:
468
+ active_files[file_id]['accessed'] = True
469
+ filepath = active_files[file_id]['filepath']
470
+
471
+ if os.path.exists(filepath):
472
+ logger.info(f"Streaming audio: {file_id}")
473
+ return send_file(
474
+ filepath,
475
+ mimetype='audio/wav',
476
+ as_attachment=False,
477
+ download_name='response.wav'
478
+ )
479
+
480
+ logger.warning(f"Audio file not found: {file_id}")
481
+ return jsonify({'error': 'File not found'}), 404
482
+
483
+ except Exception as e:
484
+ logger.error(f"Stream error: {e}")
485
+ return jsonify({'error': 'Stream failed'}), 500
486
+
487
+ @app.route('/health', methods=['GET'])
488
+ def health_check():
489
+ memory = get_memory_usage()
490
+
491
+ status = {
492
+ "status": "ready" if all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline]) else "loading",
493
+ "models": {
494
+ "stt": stt_pipeline is not None,
495
+ "llm": llm_model is not None and llm_tokenizer is not None,
496
+ "tts": tts_pipeline is not None,
497
+ "tts_type": tts_type
498
+ },
499
+ "system": {
500
+ "device": device,
501
+ "is_hf_space": IS_HF_SPACE,
502
+ "memory_mb": round(memory['rss_mb'], 1),
503
+ "available_mb": round(memory['available_mb'], 1),
504
+ "memory_percent": round(memory['percent'], 1)
505
+ },
506
+ "files": {
507
+ "active_count": len(active_files),
508
+ "cleanup_running": cleanup_thread is not None and cleanup_thread.is_alive()
509
+ }
510
+ }
511
+
512
+ return jsonify(status)
513
+
514
+ @app.route('/status', methods=['GET'])
515
+ def simple_status():
516
+ models_ready = all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline])
517
+ return jsonify({"ready": models_ready})
518
+
519
+ @app.route('/', methods=['GET'])
520
+ def home():
521
+ return """
522
+ <!DOCTYPE html>
523
+ <html>
524
+ <head>
525
+ <title>Voice AI Assistant</title>
526
+ <style>
527
+ body { font-family: Arial, sans-serif; margin: 40px; }
528
+ .status { font-size: 18px; margin: 20px 0; }
529
+ .ready { color: green; }
530
+ .loading { color: orange; }
531
+ .error { color: red; }
532
+ code { background: #f4f4f4; padding: 2px 5px; }
533
+ </style>
534
+ </head>
535
+ <body>
536
+ <h1>πŸŽ™οΈ Voice AI Assistant Server</h1>
537
+ <div class="status">Status: <span id="status">Checking...</span></div>
538
+
539
+ <h2>API Endpoints:</h2>
540
+ <ul>
541
+ <li><code>POST /process_audio</code> - Dsn Mechanics </li>
542
+ <li><code>POST /process_audio</code> - Process audio (WAV format, max 16MB)</li>
543
+ <li><code>GET /stream_audio/&lt;file_id&gt;</code> - Download audio response</li>
544
+ <li><code>GET /health</code> - Detailed health check</li>
545
+ <li><code>GET /status</code> - Simple ready status</li>
546
+ </ul>
547
+
548
+ <h2>Features:</h2>
549
+ <ul>
550
+ <li>Speech-to-Text (Whisper Tiny)</li>
551
+ <li>AI Response Generation (DialoGPT Small)</li>
552
+ <li>Text-to-Speech (gTTS)</li>
553
+ <li>Automatic file cleanup</li>
554
+ <li>Memory optimization</li>
555
+ </ul>
556
+
557
+ <p><em>Optimized for ESP32 and Hugging Face Spaces</em></p>
558
+
559
+ <script>
560
+ function updateStatus() {
561
+ fetch('/status')
562
+ .then(r => r.json())
563
+ .then(d => {
564
+ const statusEl = document.getElementById('status');
565
+ if (d.ready) {
566
+ statusEl.textContent = 'βœ… Ready';
567
+ statusEl.className = 'ready';
568
+ } else {
569
+ statusEl.textContent = '⏳ Loading models...';
570
+ statusEl.className = 'loading';
571
+ }
572
+ })
573
+ .catch(() => {
574
+ document.getElementById('status').textContent = '❌ Error';
575
+ document.getElementById('status').className = 'error';
576
+ });
577
+ }
578
+
579
+ updateStatus();
580
+ setInterval(updateStatus, 5000);
581
+ </script>
582
+ </body>
583
+ </html>
584
+ """
585
+
586
+ @app.errorhandler(Exception)
587
+ def handle_exception(e):
588
+ logger.error(f"Unhandled exception: {e}", exc_info=True)
589
+ return jsonify({"error": "Internal server error"}), 500
590
+
591
+ @app.errorhandler(413)
592
+ def handle_large_file(e):
593
+ return jsonify({"error": "Audio file too large (max 16MB)"}), 413
594
+
595
+ if __name__ == '__main__':
596
+ try:
597
+ logger.info("πŸš€ Starting Voice AI Assistant Server")
598
+ logger.info(f"Environment: {'Hugging Face Spaces' if IS_HF_SPACE else 'Local'}")
599
+
600
+ initialize_models()
601
+ logger.info("πŸŽ‰ Server ready!")
602
+
603
+ except Exception as e:
604
+ logger.error(f"❌ Startup failed: {e}")
605
+ exit(1)
606
+
607
+ port = int(os.environ.get('PORT', 7860))
608
+ logger.info(f"🌐 Server starting on port {port}")
609
+
610
+ app.run(
611
+ host='0.0.0.0',
612
+ port=port,
613
+ debug=False,
614
+ threaded=True,
615
+ use_reloader=False
616
+ )