""" Text-to-Speech (TTS) Service using Deepgram API """ import requests import os import base64 from src.utils.logger import logger from typing import Optional class TTSService: """Service for handling text-to-speech conversion using Deepgram API""" def __init__(self): self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY") self.base_url = "https://api.deepgram.com/v1/speak" self.default_model = "aura-2-thalia-en" if not self.api_key: logger.error("Deepgram API key not found in environment variables") raise ValueError("Deepgram API key is required") async def text_to_speech( self, text: str, model: Optional[str] = None, format: str = "mp3" ) -> Optional[dict]: """ Convert text to speech using Deepgram API Args: text (str): The text to convert to speech model (str): The TTS model to use (default: aura-2-thalia-en) format (str): Audio format (default: mp3) Returns: dict: Contains audio data and metadata, or None if failed """ try: if not text or not text.strip(): logger.warning("Empty text provided for TTS conversion") return None # Clean and prepare text cleaned_text = text.strip() if len(cleaned_text) > 2000: # Limit text length for TTS cleaned_text = cleaned_text[:2000] + "..." logger.warning(f"Text truncated to 2000 characters for TTS") # Prepare request url = self.base_url querystring = {"model": model or self.default_model} payload = {"text": cleaned_text} headers = { "Authorization": f"Token {self.api_key}", "Content-Type": "application/json" } logger.info(f"Converting text to speech: {cleaned_text[:100]}...") # Make request to Deepgram API response = requests.post( url, json=payload, headers=headers, params=querystring, timeout=30 ) if response.status_code == 200: # Encode audio data as base64 audio_data = response.content audio_base64 = base64.b64encode(audio_data).decode('utf-8') # Determine MIME type based on format mime_type = f"audio/{format}" if format == "mp3": mime_type = "audio/mpeg" elif format == "wav": mime_type = "audio/wav" result = { "audio_data": audio_base64, "mime_type": mime_type, "format": format, "text": cleaned_text, "model": model or self.default_model, "size_bytes": len(audio_data) } logger.info(f"TTS conversion successful: {len(audio_data)} bytes") return result else: logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}") return None except requests.exceptions.Timeout: logger.error("TTS request timed out") return None except requests.exceptions.RequestException as e: logger.error(f"TTS request failed: {str(e)}") return None except Exception as e: logger.error(f"Unexpected error in TTS conversion: {str(e)}") return None def is_available(self) -> bool: """Check if TTS service is available""" return bool(self.api_key) # Global TTS service instance tts_service = TTSService()