Spaces:
Sleeping
Sleeping
File size: 4,032 Bytes
7f15e1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
"""
Text-to-Speech (TTS) Service using Deepgram API
"""
import requests
import os
import base64
from src.utils.logger import logger
from typing import Optional
class TTSService:
"""Service for handling text-to-speech conversion using Deepgram API"""
def __init__(self):
self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY")
self.base_url = "https://api.deepgram.com/v1/speak"
self.default_model = "aura-2-thalia-en"
if not self.api_key:
logger.error("Deepgram API key not found in environment variables")
raise ValueError("Deepgram API key is required")
async def text_to_speech(
self,
text: str,
model: Optional[str] = None,
format: str = "mp3"
) -> Optional[dict]:
"""
Convert text to speech using Deepgram API
Args:
text (str): The text to convert to speech
model (str): The TTS model to use (default: aura-2-thalia-en)
format (str): Audio format (default: mp3)
Returns:
dict: Contains audio data and metadata, or None if failed
"""
try:
if not text or not text.strip():
logger.warning("Empty text provided for TTS conversion")
return None
# Clean and prepare text
cleaned_text = text.strip()
if len(cleaned_text) > 2000: # Limit text length for TTS
cleaned_text = cleaned_text[:2000] + "..."
logger.warning(f"Text truncated to 2000 characters for TTS")
# Prepare request
url = self.base_url
querystring = {"model": model or self.default_model}
payload = {"text": cleaned_text}
headers = {
"Authorization": f"Token {self.api_key}",
"Content-Type": "application/json"
}
logger.info(f"Converting text to speech: {cleaned_text[:100]}...")
# Make request to Deepgram API
response = requests.post(
url,
json=payload,
headers=headers,
params=querystring,
timeout=30
)
if response.status_code == 200:
# Encode audio data as base64
audio_data = response.content
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
# Determine MIME type based on format
mime_type = f"audio/{format}"
if format == "mp3":
mime_type = "audio/mpeg"
elif format == "wav":
mime_type = "audio/wav"
result = {
"audio_data": audio_base64,
"mime_type": mime_type,
"format": format,
"text": cleaned_text,
"model": model or self.default_model,
"size_bytes": len(audio_data)
}
logger.info(f"TTS conversion successful: {len(audio_data)} bytes")
return result
else:
logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}")
return None
except requests.exceptions.Timeout:
logger.error("TTS request timed out")
return None
except requests.exceptions.RequestException as e:
logger.error(f"TTS request failed: {str(e)}")
return None
except Exception as e:
logger.error(f"Unexpected error in TTS conversion: {str(e)}")
return None
def is_available(self) -> bool:
"""Check if TTS service is available"""
return bool(self.api_key)
# Global TTS service instance
tts_service = TTSService() |