Spaces:
Running
Running
| import pickle | |
| import numpy as np | |
| from tqdm import tqdm | |
| import edge_tts | |
| import ast | |
| import asyncio | |
| class EdgeTTSGenerator: | |
| """ | |
| A class to generate podcast-style audio from a transcript using edge-tts. | |
| """ | |
| def __init__(self, transcript_file_path, output_audio_path): | |
| """ | |
| Initialize the TTS generator with the path to the rewritten transcript file. | |
| Args: | |
| transcript_file_path (str): Path to the file containing the rewritten transcript. | |
| output_audio_path (str): Path to save the generated audio file. | |
| """ | |
| self.transcript_file_path = transcript_file_path | |
| self.output_audio_path = output_audio_path | |
| # Speaker descriptions for edge-tts voices | |
| self.speaker1_voice = "en-US-AriaNeural" | |
| self.speaker2_voice = "en-US-GuyNeural" | |
| def load_transcript(self): | |
| """ | |
| Loads the rewritten transcript from the specified file. | |
| Returns: | |
| list: The content of the transcript as a list of tuples (speaker, text). | |
| """ | |
| with open(self.transcript_file_path, 'rb') as f: | |
| return ast.literal_eval(pickle.load(f)) | |
| async def generate_audio_segment(self, text, voice_name): | |
| """ | |
| Generate audio for a given text using edge-tts. | |
| Args: | |
| text (str): Text to be synthesized. | |
| voice_name (str): The voice name to use for TTS. | |
| Returns: | |
| AudioSegment: Generated audio segment. | |
| """ | |
| communicator = edge_tts.Communicate(text, voice_name) | |
| audio_bytes = b"" | |
| async for chunk in communicator.stream(): | |
| if "data" in chunk: # Check if 'data' exists in chunk | |
| audio_bytes += chunk["data"] # Concatenate only the audio data | |
| return audio_bytes | |
| def save_audio(self, audio_data): | |
| """ | |
| Save the combined audio data to an output file. | |
| Args: | |
| audio_data (list): List of bytes containing the audio data for each segment. | |
| """ | |
| combined_audio = b"".join(audio_data) | |
| with open(self.output_audio_path, "wb") as f: | |
| f.write(combined_audio) | |
| async def generate_audio(self): | |
| """ | |
| Converts the transcript into audio and saves it to a file. | |
| Returns: | |
| str: Path to the saved audio file. | |
| """ | |
| transcript = self.load_transcript() | |
| audio_data = [] | |
| for speaker, text in tqdm(transcript, desc="Generating podcast segments", unit="segment"): | |
| voice = self.speaker1_voice if speaker == "Speaker 1" else self.speaker2_voice | |
| segment_audio = await self.generate_audio_segment(text, voice) | |
| audio_data.append(segment_audio) | |
| self.save_audio(audio_data) | |
| return self.output_audio_path | |