Spaces:

bravedims
/

AI_Avatar_Chat

Running

AI_Avatar_Chat / minimal_tts_client.py

bravedims

Fix Docker build error by removing problematic dependencies

1be0a9c 3 months ago

2.98 kB

	import torch
	import tempfile
	import logging
	import soundfile as sf
	import numpy as np
	from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
	import asyncio
	from typing import Optional

	logger = logging.getLogger(__name__)

	class MinimalTTSClient:
	"""
	Minimal TTS client with basic functionality
	Uses only core transformers without complex dependencies
	"""

	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model_loaded = False

	logger.info(f"Minimal TTS Client initialized on device: {self.device}")

	async def load_model(self):
	"""Load a simple TTS model or create mock audio"""
	try:
	logger.info("Setting up minimal TTS...")

	# For now, we'll create a mock TTS that generates simple audio
	# This avoids all the complex model loading issues
	self.model_loaded = True
	logger.info("✅ Minimal TTS ready")
	return True

	except Exception as e:
	logger.error(f"❌ Failed to load TTS: {e}")
	return False

	async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
	"""
	Convert text to speech - for now creates a simple audio file
	"""
	if not self.model_loaded:
	logger.info("TTS not loaded, loading now...")
	success = await self.load_model()
	if not success:
	raise Exception("Failed to load TTS")

	try:
	logger.info(f"Generating minimal audio for text: {text[:50]}...")

	# Create a simple tone/beep as placeholder audio
	# This ensures the system works while we debug TTS issues
	duration = min(len(text) * 0.1, 10.0) # Max 10 seconds
	sample_rate = 16000
	t = np.linspace(0, duration, int(sample_rate * duration), False)

	# Create a simple tone that varies based on text length
	frequency = 440 + (len(text) % 100) * 2 # Vary frequency slightly
	audio_data = 0.1 * np.sin(2 * np.pi * frequency * t)

	# Add some variation to make it less monotonous
	audio_data = audio_data * (1 + 0.3 * np.sin(2 * np.pi * 2 * t))

	# Save to temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
	sf.write(temp_file.name, audio_data, samplerate=sample_rate)
	temp_file.close()

	logger.info(f"✅ Generated placeholder audio: {temp_file.name}")
	logger.warning("📢 Using placeholder audio - TTS will be improved in next update")
	return temp_file.name

	except Exception as e:
	logger.error(f"❌ Error generating audio: {e}")
	raise Exception(f"Audio generation failed: {e}")