File size: 4,032 Bytes
7f15e1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Text-to-Speech (TTS) Service using Deepgram API
"""

import requests
import os
import base64
from src.utils.logger import logger
from typing import Optional

class TTSService:
    """Service for handling text-to-speech conversion using Deepgram API"""
    
    def __init__(self):
        self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY")
        self.base_url = "https://api.deepgram.com/v1/speak"
        self.default_model = "aura-2-thalia-en"
        
        if not self.api_key:
            logger.error("Deepgram API key not found in environment variables")
            raise ValueError("Deepgram API key is required")
    
    async def text_to_speech(
        self, 
        text: str, 
        model: Optional[str] = None,
        format: str = "mp3"
    ) -> Optional[dict]:
        """
        Convert text to speech using Deepgram API
        
        Args:
            text (str): The text to convert to speech
            model (str): The TTS model to use (default: aura-2-thalia-en)
            format (str): Audio format (default: mp3)
            
        Returns:
            dict: Contains audio data and metadata, or None if failed
        """
        try:
            if not text or not text.strip():
                logger.warning("Empty text provided for TTS conversion")
                return None
                
            # Clean and prepare text
            cleaned_text = text.strip()
            if len(cleaned_text) > 2000:  # Limit text length for TTS
                cleaned_text = cleaned_text[:2000] + "..."
                logger.warning(f"Text truncated to 2000 characters for TTS")
            
            # Prepare request
            url = self.base_url
            querystring = {"model": model or self.default_model}
            payload = {"text": cleaned_text}
            headers = {
                "Authorization": f"Token {self.api_key}",
                "Content-Type": "application/json"
            }
            
            logger.info(f"Converting text to speech: {cleaned_text[:100]}...")
            
            # Make request to Deepgram API
            response = requests.post(
                url, 
                json=payload, 
                headers=headers, 
                params=querystring,
                timeout=30
            )
            
            if response.status_code == 200:
                # Encode audio data as base64
                audio_data = response.content
                audio_base64 = base64.b64encode(audio_data).decode('utf-8')
                
                # Determine MIME type based on format
                mime_type = f"audio/{format}"
                if format == "mp3":
                    mime_type = "audio/mpeg"
                elif format == "wav":
                    mime_type = "audio/wav"
                
                result = {
                    "audio_data": audio_base64,
                    "mime_type": mime_type,
                    "format": format,
                    "text": cleaned_text,
                    "model": model or self.default_model,
                    "size_bytes": len(audio_data)
                }
                
                logger.info(f"TTS conversion successful: {len(audio_data)} bytes")
                return result
                
            else:
                logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}")
                return None
                
        except requests.exceptions.Timeout:
            logger.error("TTS request timed out")
            return None
        except requests.exceptions.RequestException as e:
            logger.error(f"TTS request failed: {str(e)}")
            return None
        except Exception as e:
            logger.error(f"Unexpected error in TTS conversion: {str(e)}")
            return None
    
    def is_available(self) -> bool:
        """Check if TTS service is available"""
        return bool(self.api_key)

# Global TTS service instance
tts_service = TTSService()