""" Text-to-Speech (TTS) Service using Deepgram API """ import requests import os import base64 import re from src.utils.logger import logger from typing import Optional class TTSService: """Service for handling text-to-speech conversion using Deepgram API""" def __init__(self): self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY") self.base_url = "https://api.deepgram.com/v1/speak" self.default_model = "aura-2-thalia-en" if not self.api_key: logger.error("Deepgram API key not found in environment variables") raise ValueError("Deepgram API key is required") def clean_text_for_speech(self, text: str) -> str: """ Clean text for speech synthesis by removing problematic characters Args: text (str): The text to clean Returns: str: Cleaned text suitable for speech synthesis """ if not text or not isinstance(text, str): return "" # Remove markdown formatting text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Remove bold **text** text = re.sub(r'\*(.*?)\*', r'\1', text) # Remove italic *text* text = re.sub(r'`(.*?)`', r'\1', text) # Remove code `text` text = re.sub(r'#{1,6}\s', '', text) # Remove headers # ## ### text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) # Remove links [text](url) -> text # Remove emojis and special unicode characters # Emoticons text = re.sub(r'[\U0001F600-\U0001F64F]', '', text) # Misc symbols text = re.sub(r'[\U0001F300-\U0001F5FF]', '', text) # Transport & map text = re.sub(r'[\U0001F680-\U0001F6FF]', '', text) # Regional indicators text = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', text) # Misc symbols text = re.sub(r'[\U00002600-\U000026FF]', '', text) # Dingbats text = re.sub(r'[\U00002700-\U000027BF]', '', text) # Variation selectors text = re.sub(r'[\U0000FE00-\U0000FE0F]', '', text) # Supplemental symbols text = re.sub(r'[\U0001F900-\U0001F9FF]', '', text) # Remove problematic punctuation and special characters text = re.sub(r'[""'']', '"', text) # Replace smart quotes with regular quotes text = re.sub(r'[–—]', '-', text) # Replace em/en dashes with hyphens text = re.sub(r'[…]', '...', text) # Replace ellipsis character text = re.sub(r'[«»]', '"', text) # Replace angle quotes text = re.sub(r'[‹›]', "'", text) # Replace single angle quotes # Remove control characters and zero-width characters text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text) # Zero-width chars text = re.sub(r'[\u0000-\u001F\u007F-\u009F]', '', text) # Control chars # Clean up extra whitespace text = re.sub(r'\s+', ' ', text) # Multiple spaces to single space text = text.strip() # Trim leading/trailing spaces # Remove multiple consecutive punctuation text = re.sub(r'\.{3,}', '...', text) # Multiple dots to ellipsis text = re.sub(r'!{2,}', '!', text) # Multiple exclamations to single text = re.sub(r'\?{2,}', '?', text) # Multiple questions to single # Ensure proper sentence endings text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text) # Space after sentence endings return text async def text_to_speech( self, text: str, model: Optional[str] = None, format: str = "mp3" ) -> Optional[dict]: """ Convert text to speech using Deepgram API Args: text (str): The text to convert to speech model (str): The TTS model to use (default: aura-2-thalia-en) format (str): Audio format (default: mp3) Returns: dict: Contains audio data and metadata, or None if failed """ try: if not text or not text.strip(): logger.warning("Empty text provided for TTS conversion") return None # Clean and prepare text cleaned_text = self.clean_text_for_speech(text) if not cleaned_text or not cleaned_text.strip(): logger.warning("Text became empty after cleaning for TTS") return None if len(cleaned_text) > 2000: # Limit text length for TTS cleaned_text = cleaned_text[:2000] + "..." logger.warning(f"Text truncated to 2000 characters for TTS") # Prepare request url = self.base_url querystring = {"model": model or self.default_model} payload = {"text": cleaned_text} headers = { "Authorization": f"Token {self.api_key}", "Content-Type": "application/json" } logger.info(f"Converting text to speech: '{cleaned_text[:100]}...' (original: '{text[:50]}...')") # Make request to Deepgram API response = requests.post( url, json=payload, headers=headers, params=querystring, timeout=30 ) if response.status_code == 200: # Encode audio data as base64 audio_data = response.content audio_base64 = base64.b64encode(audio_data).decode('utf-8') # Determine MIME type based on format mime_type = f"audio/{format}" if format == "mp3": mime_type = "audio/mpeg" elif format == "wav": mime_type = "audio/wav" result = { "audio_data": audio_base64, "mime_type": mime_type, "format": format, "text": cleaned_text, "model": model or self.default_model, "size_bytes": len(audio_data) } logger.info(f"TTS conversion successful: {len(audio_data)} bytes") return result else: logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}") return None except requests.exceptions.Timeout: logger.error("TTS request timed out") return None except requests.exceptions.RequestException as e: logger.error(f"TTS request failed: {str(e)}") return None except Exception as e: logger.error(f"Unexpected error in TTS conversion: {str(e)}") return None def is_available(self) -> bool: """Check if TTS service is available""" return bool(self.api_key) # Global TTS service instance tts_service = TTSService()