Spaces:

bravedims
/

AI_Avatar_Chat

Running

AI_Avatar_Chat / robust_tts_client.py

bravedims

Fix TTS generation errors with robust fallback client

0c8ed18 3 months ago

6.36 kB

	import torch
	import tempfile
	import logging
	import soundfile as sf
	import numpy as np
	import asyncio
	from typing import Optional

	logger = logging.getLogger(__name__)

	class RobustTTSClient:
	"""
	Robust TTS client that always works - generates placeholder audio tones
	No external dependencies that can fail
	"""

	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model_loaded = False

	logger.info(f"Robust TTS Client initialized on device: {self.device}")

	async def load_model(self):
	"""Always succeeds - no actual model loading"""
	try:
	logger.info("Setting up robust placeholder TTS...")
	self.model_loaded = True
	logger.info("✅ Robust TTS ready (placeholder audio mode)")
	return True

	except Exception as e:
	logger.error(f"❌ Unexpected error in TTS setup: {e}")
	# Even if something goes wrong, we can still generate audio
	self.model_loaded = True
	return True

	def generate_tone_audio(self, text: str, voice_id: Optional[str] = None) -> str:
	"""Generate audio tone based on text content - always works"""
	try:
	# Calculate duration based on text length
	duration = max(2.0, min(len(text) * 0.08, 15.0)) # 0.08s per character, max 15s
	sample_rate = 22050 # Standard audio sample rate

	# Generate time array
	t = np.linspace(0, duration, int(sample_rate * duration), False)

	# Create varied tones based on text and voice_id
	base_freq = 440 # A4 note

	# Vary frequency based on voice_id (different "voices")
	voice_multipliers = {
	"21m00Tcm4TlvDq8ikWAM": 1.0, # Female (higher)
	"pNInz6obpgDQGcFmaJgB": 0.75, # Male (lower)
	"EXAVITQu4vr4xnSDxMaL": 1.1, # Sweet female
	"ErXwobaYiN019PkySvjV": 0.8, # Professional male
	"TxGEqnHWrfWFTfGW9XjX": 0.65, # Deep male
	"yoZ06aMxZJJ28mfd3POQ": 0.9, # Friendly
	"AZnzlk1XvdvUeBnXmlld": 1.05, # Strong female
	}

	freq_multiplier = voice_multipliers.get(voice_id, 1.0)
	frequency = base_freq * freq_multiplier

	# Generate primary tone
	audio_data = 0.3 * np.sin(2 * np.pi * frequency * t)

	# Add harmonics for more natural sound
	audio_data += 0.15 * np.sin(2 * np.pi * frequency * 2 * t) # Octave
	audio_data += 0.1 * np.sin(2 * np.pi * frequency * 3 * t) # Fifth

	# Add text-based variation (different words create different patterns)
	text_hash = abs(hash(text.lower())) % 1000
	variation_freq = 50 + (text_hash % 200) # 50-250 Hz variation
	audio_data += 0.05 * np.sin(2 * np.pi * variation_freq * t)

	# Add amplitude envelope (fade in/out)
	fade_samples = int(0.1 * sample_rate) # 0.1 second fade
	if len(audio_data) > 2 * fade_samples:
	# Fade in
	audio_data[:fade_samples] *= np.linspace(0, 1, fade_samples)
	# Fade out
	audio_data[-fade_samples:] *= np.linspace(1, 0, fade_samples)

	# Normalize audio
	audio_data = audio_data / np.max(np.abs(audio_data))

	return audio_data, sample_rate

	except Exception as e:
	logger.error(f"Error in tone generation: {e}")
	# Fallback to simple beep
	duration = 2.0
	sample_rate = 22050
	t = np.linspace(0, duration, int(sample_rate * duration), False)
	audio_data = 0.3 * np.sin(2 * np.pi * 440 * t)
	return audio_data, sample_rate

	async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
	"""
	Convert text to speech - generates placeholder audio that always works
	"""
	if not self.model_loaded:
	logger.info("TTS not loaded, loading now...")
	success = await self.load_model()
	if not success:
	logger.error("TTS loading failed, but continuing with basic audio")

	try:
	logger.info(f"Generating audio for text: {text[:50]}...")
	logger.info(f"Using voice profile: {voice_id or 'default'}")

	# Generate audio data
	audio_data, sample_rate = self.generate_tone_audio(text, voice_id)

	# Save to temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
	sf.write(temp_file.name, audio_data, samplerate=sample_rate)
	temp_file.close()

	logger.info(f"✅ Generated audio file: {temp_file.name}")
	logger.info(f"📊 Audio details: {len(audio_data)/sample_rate:.1f}s, {sample_rate}Hz")
	logger.warning("🔊 Using placeholder audio - Real TTS coming in future update")
	return temp_file.name

	except Exception as e:
	logger.error(f"❌ Critical error in audio generation: {str(e)}")
	logger.error(f"Exception type: {type(e).__name__}")

	# Last resort: create minimal audio file
	try:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
	# Create 2 seconds of simple sine wave
	sample_rate = 22050
	duration = 2.0
	t = np.linspace(0, duration, int(sample_rate * duration), False)
	audio_data = 0.3 * np.sin(2 * np.pi * 440 * t)
	sf.write(temp_file.name, audio_data, samplerate=sample_rate)
	temp_file.close()

	logger.info(f"✅ Created fallback audio: {temp_file.name}")
	return temp_file.name

	except Exception as final_error:
	logger.error(f"❌ Even fallback audio failed: {final_error}")
	raise Exception(f"Complete TTS failure: {final_error}")