Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import outetts | |
| import torch | |
| import spaces | |
| # Define available speakers | |
| AVAILABLE_SPEAKERS = [ | |
| "en_male_1", "en_male_2", "en_female_1", "en_female_2", | |
| "zh_male_1", "zh_male_2", "zh_female_1", "zh_female_2", | |
| "jp_male_1", "jp_male_2", "jp_female_1", "jp_female_2", | |
| "kr_male_1", "kr_male_2", "kr_female_1", "kr_female_2" | |
| ] | |
| def get_available_speakers(): | |
| return AVAILABLE_SPEAKERS | |
| def generate_tts(text, temperature, repetition_penalty, speaker_selection, reference_audio): | |
| model_config = outetts.HFModelConfig_v2( | |
| model_path="OuteAI/OuteTTS-0.3-1B", | |
| tokenizer_path="OuteAI/OuteTTS-0.3-1B", | |
| dtype=torch.bfloat16, | |
| device="cuda" | |
| ) | |
| interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config) | |
| try: | |
| if reference_audio: | |
| speaker = interface.create_speaker(reference_audio) | |
| elif speaker_selection and speaker_selection != "None": | |
| speaker = interface.load_default_speaker(speaker_selection) | |
| else: | |
| speaker = None | |
| gen_cfg = outetts.GenerationConfig( | |
| text=text, | |
| temperature=temperature, | |
| repetition_penalty=repetition_penalty, | |
| max_length=4096, | |
| speaker=speaker, | |
| ) | |
| output = interface.generate(config=gen_cfg) | |
| if output.audio is None: | |
| raise ValueError("Audio generation failed. Please try again.") | |
| output_path = "output.wav" | |
| output.save(output_path) | |
| return output_path, None | |
| except Exception as e: | |
| return None, str(e) | |
| # Custom CSS for 3D effect and modern UI | |
| custom_css = """ | |
| .container { | |
| background: linear-gradient(145deg, #f0f0f0, #ffffff); | |
| border-radius: 20px; | |
| box-shadow: 20px 20px 60px #bebebe, -20px -20px 60px #ffffff; | |
| padding: 2rem; | |
| } | |
| .title { | |
| font-size: 2.5rem; | |
| text-align: center; | |
| background: linear-gradient(45deg, #2196F3, #00BCD4); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin-bottom: 2rem; | |
| } | |
| .radio-group { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); | |
| gap: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .control-panel { | |
| background: rgba(255, 255, 255, 0.9); | |
| border-radius: 15px; | |
| padding: 1.5rem; | |
| margin: 1rem 0; | |
| box-shadow: 0 8px 16px rgba(0,0,0,0.1); | |
| } | |
| .generate-button { | |
| background: linear-gradient(45deg, #2196F3, #00BCD4); | |
| color: white; | |
| border: none; | |
| padding: 1rem 2rem; | |
| border-radius: 8px; | |
| cursor: pointer; | |
| transition: transform 0.2s; | |
| } | |
| .generate-button:hover { | |
| transform: translateY(-2px); | |
| } | |
| """ | |
| with gr.Blocks(css=custom_css) as demo: | |
| with gr.Column(elem_classes="container"): | |
| gr.Markdown("# Voice Clone Multilingual TTS", elem_classes="title") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Main input section with 3D effect | |
| with gr.Group(elem_classes="control-panel"): | |
| text_input = gr.Textbox( | |
| label="Enter Text", | |
| placeholder="Type your text here...", | |
| lines=3 | |
| ) | |
| speaker_radio = gr.Radio( | |
| choices=get_available_speakers(), | |
| value="en_male_1", | |
| label="Choose Voice", | |
| elem_classes="radio-group" | |
| ) | |
| with gr.Row(): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.1, | |
| label="Expression Level" | |
| ) | |
| repetition_penalty = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.1, | |
| label="Clarity" | |
| ) | |
| reference_audio = gr.Audio( | |
| label="Upload Voice Reference", | |
| type="filepath" | |
| ) | |
| submit_button = gr.Button( | |
| "Generate Speech", | |
| variant="primary", | |
| elem_classes="generate-button" | |
| ) | |
| with gr.Column(scale=1): | |
| # Output section with 3D effect | |
| with gr.Group(elem_classes="control-panel"): | |
| audio_output = gr.Audio( | |
| label="Generated Audio", | |
| type="filepath" | |
| ) | |
| error_box = gr.Textbox( | |
| label="Status", | |
| visible=False | |
| ) | |
| submit_button.click( | |
| fn=generate_tts, | |
| inputs=[ | |
| text_input, | |
| temperature, | |
| repetition_penalty, | |
| speaker_radio, | |
| reference_audio, | |
| ], | |
| outputs=[audio_output, error_box] | |
| ).then( | |
| fn=lambda x: gr.update(visible=bool(x)), | |
| inputs=[error_box], | |
| outputs=[error_box] | |
| ) | |
| demo.launch() |