Spaces:

yasserrmd
/

NotebookLlama

Running

App Files Files Community

NotebookLlama / generate_audio_edgetts.py

yasserrmd

Update generate_audio_edgetts.py

59ab165 verified about 1 year ago

raw

history blame

2.87 kB

	import pickle
	import numpy as np
	from tqdm import tqdm
	import edge_tts
	import ast
	import asyncio


	class EdgeTTSGenerator:
	"""
	A class to generate podcast-style audio from a transcript using edge-tts.
	"""
	def __init__(self, transcript_file_path, output_audio_path):
	"""
	Initialize the TTS generator with the path to the rewritten transcript file.

	Args:
	transcript_file_path (str): Path to the file containing the rewritten transcript.
	output_audio_path (str): Path to save the generated audio file.
	"""
	self.transcript_file_path = transcript_file_path
	self.output_audio_path = output_audio_path

	# Speaker descriptions for edge-tts voices
	self.speaker1_voice = "en-US-AriaNeural"
	self.speaker2_voice = "en-US-GuyNeural"

	def load_transcript(self):
	"""
	Loads the rewritten transcript from the specified file.

	Returns:
	list: The content of the transcript as a list of tuples (speaker, text).
	"""
	with open(self.transcript_file_path, 'rb') as f:
	return ast.literal_eval(pickle.load(f))

	async def generate_audio_segment(self, text, voice_name):
	"""
	Generate audio for a given text using edge-tts.

	Args:
	text (str): Text to be synthesized.
	voice_name (str): The voice name to use for TTS.

	Returns:
	AudioSegment: Generated audio segment.
	"""
	communicator = edge_tts.Communicate(text, voice_name)
	audio_bytes = b""
	async for chunk in communicator.stream():
	if "data" in chunk: # Check if 'data' exists in chunk
	audio_bytes += chunk["data"] # Concatenate only the audio data
	return audio_bytes

	def save_audio(self, audio_data):
	"""
	Save the combined audio data to an output file.

	Args:
	audio_data (list): List of bytes containing the audio data for each segment.
	"""
	combined_audio = b"".join(audio_data)
	with open(self.output_audio_path, "wb") as f:
	f.write(combined_audio)

	async def generate_audio(self):
	"""
	Converts the transcript into audio and saves it to a file.

	Returns:
	str: Path to the saved audio file.
	"""
	transcript = self.load_transcript()
	audio_data = []

	for speaker, text in tqdm(transcript, desc="Generating podcast segments", unit="segment"):
	voice = self.speaker1_voice if speaker == "Speaker 1" else self.speaker2_voice
	segment_audio = await self.generate_audio_segment(text, voice)
	audio_data.append(segment_audio)

	self.save_audio(audio_data)
	return self.output_audio_path