import gradio as gr

import torch
import json
import random
import numpy as np
import base64
import spaces
from diffrhythm2.utils import (
    prepare_model,
    parse_lyrics,
    get_audio_prompt,
    get_text_prompt,
    inference,
    inference_stream
)

lrc_tokenizer = None
MAX_SEED = np.iinfo(np.int32).max
device='cuda'
dtype=torch.float16
diffrhythm2, mulan, lrc_tokenizer, decoder = prepare_model("ASLP-Lab/DiffRhythm2", device, dtype)

@spaces.GPU
def infer_music(
        lrc, 
        current_prompt_type,
        audio_prompt=None, 
        text_prompt=None, 
        seed=42, 
        randomize_seed=False, 
        steps=16, 
        cfg_strength=1.0, 
        file_type='wav', 
        odeint_method='euler',
        device='cuda'
    ):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    torch.manual_seed(seed)
    print(seed, current_prompt_type)
    try:
        lrc_prompt = parse_lyrics(lrc_tokenizer, lrc)
        lrc_prompt = torch.tensor(sum(lrc_prompt, []), dtype=torch.long, device=device)
        if current_prompt_type == "audio":
            style_prompt = get_audio_prompt(mulan, audio_prompt, device, dtype)
        else:
            style_prompt = get_text_prompt(mulan, text_prompt, device, dtype)
    except Exception as e:
        raise gr.Error(f"Error: {str(e)}")
    style_prompt = style_prompt.to(dtype)
    generate_song = inference(
        model=diffrhythm2, 
        decoder=decoder, 
        text=lrc_prompt, 
        style_prompt=style_prompt,
        sample_steps=steps,
        cfg_strength=cfg_strength,
        odeint_method=odeint_method,
        duration=240,
        file_type=file_type
    )
    return generate_song
    # for block in inference_stream(
    #     model=diffrhythm2, 
    #     decoder=decoder, 
    #     text=lrc_prompt, 
    #     style_prompt=style_prompt,
    #     sample_steps=steps,
    #     cfg_strength=cfg_strength,
    #     odeint_method=odeint_method,
    #     duration=240,
    #     file_type=file_type
    # ):
    #     yield block


css = """
/* 固定文本域高度并强制滚动条 */
.lyrics-scroll-box textarea {
    height: 405px !important;  /* 固定高度 */
    max-height: 500px !important;  /* 最大高度 */
    overflow-y: auto !important;  /* 垂直滚动 */
    white-space: pre-wrap;  /* 保留换行 */
    line-height: 1.5;  /* 行高优化 */
}

.gr-examples {
    background: transparent !important;
    border: 1px solid #e0e0e0 !important;
    border-radius: 8px;
    margin: 1rem 0 !important;
    padding: 1rem !important;
}

"""
import base64

def image_to_base64(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

with gr.Blocks(css=css) as demo:
    gr.HTML(f"""
            <div style="flex: 1; text-align: center;">
                <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
                    Di♪♪Rhythm 2 (谛韵)
                </div>
                <div style="display:flex; justify-content: center; column-gap:4px;">
                    <a href="https://arxiv.org/pdf/2510.22950">
                        <img src='https://img.shields.io/badge/Arxiv-Paper-blue'>
                    </a> 
                    <a href="https://github.com/ASLP-lab/DiffRhythm2">
                        <img src='https://img.shields.io/badge/GitHub-Repo-green'>
                    </a> 
                    <a href="https://aslp-lab.github.io/DiffRhythm2.github.io/">
                        <img src='https://img.shields.io/badge/Project-Page-brown'>
                    </a>
                </div>
            </div> 
            """)
    
    with gr.Tabs() as tabs:
        
        # page 1
        with gr.Tab("Music Generate", id=0):
            with gr.Row():
                with gr.Column():
                    lrc = gr.Textbox(
                        label="Lyrics",
                        placeholder="Input the full lyrics",
                        lines=12,
                        max_lines=50,
                        elem_classes="lyrics-scroll-box",
                        value="""[start]
[intro]
[verse]
Thought I heard your voice yesterday
When I turned around to say
That I loved you baby
I realize it was juss my mind
Played tricks on me
And it seems colder lately at night
And I try to sleep with the lights on
Every time the phone rings
I pray to God it's you
And I just can't believe
That we're through
[chorus]
I miss you
There's no other way to say it
And I can't deny it
I miss you
It's so easy to see
I miss you and me
[verse]
Is it turning over this time
Have we really changed our minds about each other's love
All the feelings that we used to share
I refuse to believe
That you don't care
[chorus]
I miss you
There's no other way to say it
And I and I can't deny it
I miss you
[verse]
It's so easy to see
I've got to gather myself as together
I've been through worst kinds of weather
If it's over now
[outro]"""
                    )
                    current_prompt_type = gr.State(value="text")
                    with gr.Tabs() as inside_tabs:
                        with gr.Tab("Text Prompt"):
                            text_prompt = gr.Textbox(
                            label="Text Prompt",
                            value="Pop, Piano, Bass, Drums, Happy",
                            placeholder="Enter the Text Prompt, eg: emotional piano pop",
                        )
                        with gr.Tab("Audio Prompt"):
                            audio_prompt = gr.Audio(label="Audio Prompt", type="filepath")
                        
                        def update_prompt_type(evt: gr.SelectData):
                            return "text" if evt.index == 0 else "audio"

                        inside_tabs.select(
                            fn=update_prompt_type,
                            outputs=current_prompt_type
                        )
                        
                    
                with gr.Column():
                    
                    with gr.Accordion("Best Practices Guide", open=True):
                        gr.Markdown("""
                        1. **Lyrics Format Requirements**
                        - Each line must follow: `Lyric content`
                        - Example of valid format:
                            ``` 
                            [intro]
                            [verse]
                            Thought I heard your voice yesterday
                            When I turned around to say
                            ```

                        2. **Audio Prompt Requirements**
                        - Reference audio should be ≥ 1 second, Audio >10 seconds will be randomly clipped into 10 seconds
                        - For optimal results, the 10-second clips should be carefully selected
                        - Shorter clips may lead to incoherent generation
                        
                        3. **Supported Languages**
                        - Chinese and English

                        **Due to issues with Gradio's streaming audio output, we will update the streaming feature in the future. Please stay tuned!**
                        """)
                    lyrics_btn = gr.Button("Generate", variant="primary")
                    # audio_output = gr.Gallery(label="Audio Results")
                    audio_output = gr.Audio(label="Audio Result", elem_id="audio_output")
                    with gr.Accordion("Advanced Settings", open=False):
                        seed = gr.Slider(
                            label="Seed",
                            minimum=0,
                            maximum=MAX_SEED,
                            step=1,
                            value=0,
                        )
                        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)      
                        
                        steps = gr.Slider(
                            minimum=10,
                            maximum=100,
                            value=16,
                            step=1,
                            label="Diffusion Steps",
                            interactive=True,
                            elem_id="step_slider"
                        )
                        cfg_strength = gr.Slider(
                            minimum=1,
                            maximum=10,
                            value=1.3,
                            step=0.5,
                            label="CFG Strength",
                            interactive=True,
                            elem_id="step_slider"
                        )
                        
                        odeint_method = gr.Radio(["euler", "midpoint", "rk4","implicit_adams"], label="ODE Solver", value="euler")                        
                        file_type = gr.Dropdown(["wav", "mp3", "ogg"], label="Output Format", value="mp3")
                    

            # gr.Examples(
            #     examples=[
            #         ["src/prompt/classic_cn.wav"],
            #         ["src/prompt/classic_en.wav"],
            #         ["src/prompt/country_cn.wav"],
            #         ["src/prompt/country_en.wav"],
            #         ["src/prompt/jazz_cn.wav"],
            #         ["src/prompt/jazz_en.wav"],
            #         ["src/prompt/pop_cn.wav"],
            #         ["src/prompt/pop_en.wav"],
            #         ["src/prompt/rap_cn.wav"],
            #         ["src/prompt/rap_en.wav"],
            #         ["src/prompt/rock_cn.wav"],
            #         ["src/prompt/rock_en.wav"]
            #     ],
            #     inputs=[audio_prompt],  
            #     label="Audio Examples",
            #     examples_per_page=12,
            #     elem_id="audio-examples-container" 
            # )
            
            # gr.Examples(
            #     examples=[
            #         ["Pop Emotional Piano"],
            #         ["流行 情感 钢琴"],
            #         ["Indie folk ballad, coming-of-age themes, acoustic guitar picking with harmonica interludes"],
            #         ["独立民谣, 成长主题, 原声吉他弹奏与口琴间奏"]
            #     ],
            #     inputs=[text_prompt],  
            #     label="Text Examples",
            #     examples_per_page=4,
            #     elem_id="text-examples-container" 
            # )

            # gr.Examples(
            #     examples=[
            #          ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
            #          ["""[00:05.00]Stardust whispers in your eyes\n[00:09.30]Moonlight paints our silhouettes\n[00:13.75]Tides bring secrets from the deep\n[00:18.20]Where forever's breath is kept\n[00:22.90]We dance through constellations' maze\n[00:27.15]Footprints melt in cosmic waves\n[00:31.65]Horizons hum our silent vow\n[00:36.10]Time unravels here and now\n[00:40.85]Eternal embers in the night oh oh oh\n[00:45.25]Healing scars with liquid light\n[00:49.70]Galaxies write our refrain\n[00:54.15]Love reborn in endless rain\n[01:15.30]Paper boats of memories\n[01:19.75]Float through veins of ancient trees\n[01:24.20]Your laughter spins aurora threads\n[01:28.65]Weaving dawn through featherbed"""],
            #          ["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""],
            #          ["""[00:16.55]倦鸟西归 竹影余晖\n[00:23.58]禅意心扉\n[00:27.32]待清风 拂开一池春水\n[00:30.83]你的手绘 玉色难褪\n[00:37.99]我端详飘散的韵味\n[00:40.65]落款壶底的名讳\n[00:42.92]如吻西施的嘴\n[00:45.14]风雅几回 总相随\n[00:52.32]皆因你珍贵\n[00:57.85]三千弱水 煮一杯\n[01:02.21]我只饮下你的美\n[01:04.92]千年余味 紫砂壶伴我醉\n[01:09.73]酿一世无悔\n[01:12.09]沏壶春水 翠烟飞\n[01:16.62]把盏不尽你的香味\n[01:20.06]邀月相对 愿今生同宿同归\n[01:26.43]只让你陪\n[01:46.12]茗香芳菲 世俗无追\n"""]
            #     ],
            #     inputs=[lrc],
            #     label="Lrc Examples",
            #     examples_per_page=4,
            #     elem_id="lrc-examples-container",
            # )

    tabs.select(
    lambda s: None, 
    None, 
    None 
    )
    
    # TODO add max_frames parameter for infer_music
    lyrics_btn.click(
        fn=infer_music,
        inputs=[
            lrc, 
            current_prompt_type,
            audio_prompt, 
            text_prompt, 
            seed, 
            randomize_seed, 
            steps, 
            cfg_strength, 
            file_type, 
            odeint_method,
        ],
        outputs=audio_output,
    )


# demo.queue().launch(show_api=False, show_error=True)


if __name__ == "__main__":
    demo.launch()