Spaces:

CAMB-AI
/

mars5_space

Runtime error

mars5_space / app.py

Update app.py

59df2b8 verified over 1 year ago

1.65 kB


	import gradio as gr
	import torch
	import librosa
	from pathlib import Path
	import tempfile, torchaudio


	# Load the MARS5 model
	mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)

	# Default reference audio and transcript
	# default_audio_path = "example.wav"
	# default_transcript = "We actually haven't managed to meet demand."

	# Function to process the text and audio input and generate the synthesized output
	def synthesize(text, audio_file, transcript):
	# Load the reference audio
	wav, sr = librosa.load(audio_file, sr=mars5.sr, mono=True)
	wav = torch.from_numpy(wav)

	# Define the configuration for the TTS model
	deep_clone = True
	cfg = config_class(deep_clone=deep_clone, rep_penalty_window=100, top_k=100, temperature=0.7, freq_penalty=3)

	# Generate the synthesized audio
	ar_codes, wav_out = mars5.tts(text, wav, transcript, cfg=cfg)

	# Save the synthesized audio to a temporary file
	output_path = Path(tempfile.mktemp(suffix=".wav"))
	torchaudio.save(output_path, wav_out.unsqueeze(0), mars5.sr)

	return str(output_path)

	# Create the Gradio interface
	interface = gr.Interface(
	fn=synthesize,
	inputs=[
	gr.Textbox(label="Text to synthesize"),
	gr.Audio(label="Audio file to clone from", type="filepath"),
	gr.Textbox(label="Uploaded audio file transcript"),
	],
	outputs=gr.Audio(label="Synthesized Audio"),
	title="MARS5 TTS Demo",
	description="Enter text and upload an audio file to clone the voice and generate synthesized speech using MARS5 TTS."
	)

	# Launch the Gradio app
	interface.launch()