import gradio as gr import torch import json import random import numpy as np import base64 import spaces from diffrhythm2.utils import ( prepare_model, parse_lyrics, get_audio_prompt, get_text_prompt, inference, inference_stream ) lrc_tokenizer = None MAX_SEED = np.iinfo(np.int32).max device='cuda' dtype=torch.float16 diffrhythm2, mulan, lrc_tokenizer, decoder = prepare_model("ASLP-Lab/DiffRhythm2", device, dtype) @spaces.GPU def infer_music( lrc, current_prompt_type, audio_prompt=None, text_prompt=None, seed=42, randomize_seed=False, steps=16, cfg_strength=1.0, file_type='wav', odeint_method='euler', device='cuda' ): if randomize_seed: seed = random.randint(0, MAX_SEED) torch.manual_seed(seed) print(seed, current_prompt_type) try: lrc_prompt = parse_lyrics(lrc_tokenizer, lrc) lrc_prompt = torch.tensor(sum(lrc_prompt, []), dtype=torch.long, device=device) if current_prompt_type == "audio": style_prompt = get_audio_prompt(mulan, audio_prompt, device, dtype) else: style_prompt = get_text_prompt(mulan, text_prompt, device, dtype) except Exception as e: raise gr.Error(f"Error: {str(e)}") style_prompt = style_prompt.to(dtype) generate_song = inference( model=diffrhythm2, decoder=decoder, text=lrc_prompt, style_prompt=style_prompt, sample_steps=steps, cfg_strength=cfg_strength, odeint_method=odeint_method, duration=240, file_type=file_type ) return generate_song # for block in inference_stream( # model=diffrhythm2, # decoder=decoder, # text=lrc_prompt, # style_prompt=style_prompt, # sample_steps=steps, # cfg_strength=cfg_strength, # odeint_method=odeint_method, # duration=240, # file_type=file_type # ): # yield block css = """ /* 固定文本域高度并强制滚动条 */ .lyrics-scroll-box textarea { height: 405px !important; /* 固定高度 */ max-height: 500px !important; /* 最大高度 */ overflow-y: auto !important; /* 垂直滚动 */ white-space: pre-wrap; /* 保留换行 */ line-height: 1.5; /* 行高优化 */ } .gr-examples { background: transparent !important; border: 1px solid #e0e0e0 !important; border-radius: 8px; margin: 1rem 0 !important; padding: 1rem !important; } """ import base64 def image_to_base64(path): with open(path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') with gr.Blocks(css=css) as demo: gr.HTML(f"""
""") with gr.Tabs() as tabs: # page 1 with gr.Tab("Music Generate", id=0): with gr.Row(): with gr.Column(): lrc = gr.Textbox( label="Lyrics", placeholder="Input the full lyrics", lines=12, max_lines=50, elem_classes="lyrics-scroll-box", value="""[start] [intro] [verse] Thought I heard your voice yesterday When I turned around to say That I loved you baby I realize it was juss my mind Played tricks on me And it seems colder lately at night And I try to sleep with the lights on Every time the phone rings I pray to God it's you And I just can't believe That we're through [chorus] I miss you There's no other way to say it And I can't deny it I miss you It's so easy to see I miss you and me [verse] Is it turning over this time Have we really changed our minds about each other's love All the feelings that we used to share I refuse to believe That you don't care [chorus] I miss you There's no other way to say it And I and I can't deny it I miss you [verse] It's so easy to see I've got to gather myself as together I've been through worst kinds of weather If it's over now [outro]""" ) current_prompt_type = gr.State(value="text") with gr.Tabs() as inside_tabs: with gr.Tab("Text Prompt"): text_prompt = gr.Textbox( label="Text Prompt", value="Pop, Piano, Bass, Drums, Happy", placeholder="Enter the Text Prompt, eg: emotional piano pop", ) with gr.Tab("Audio Prompt"): audio_prompt = gr.Audio(label="Audio Prompt", type="filepath") def update_prompt_type(evt: gr.SelectData): return "text" if evt.index == 0 else "audio" inside_tabs.select( fn=update_prompt_type, outputs=current_prompt_type ) with gr.Column(): with gr.Accordion("Best Practices Guide", open=True): gr.Markdown(""" 1. **Lyrics Format Requirements** - Each line must follow: `Lyric content` - Example of valid format: ``` [intro] [verse] Thought I heard your voice yesterday When I turned around to say ``` 2. **Audio Prompt Requirements** - Reference audio should be ≥ 1 second, Audio >10 seconds will be randomly clipped into 10 seconds - For optimal results, the 10-second clips should be carefully selected - Shorter clips may lead to incoherent generation 3. **Supported Languages** - Chinese and English **Due to issues with Gradio's streaming audio output, we will update the streaming feature in the future. Please stay tuned!** """) lyrics_btn = gr.Button("Generate", variant="primary") # audio_output = gr.Gallery(label="Audio Results") audio_output = gr.Audio(label="Audio Result", elem_id="audio_output") with gr.Accordion("Advanced Settings", open=False): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) steps = gr.Slider( minimum=10, maximum=100, value=16, step=1, label="Diffusion Steps", interactive=True, elem_id="step_slider" ) cfg_strength = gr.Slider( minimum=1, maximum=10, value=1.3, step=0.5, label="CFG Strength", interactive=True, elem_id="step_slider" ) odeint_method = gr.Radio(["euler", "midpoint", "rk4","implicit_adams"], label="ODE Solver", value="euler") file_type = gr.Dropdown(["wav", "mp3", "ogg"], label="Output Format", value="mp3") # gr.Examples( # examples=[ # ["src/prompt/classic_cn.wav"], # ["src/prompt/classic_en.wav"], # ["src/prompt/country_cn.wav"], # ["src/prompt/country_en.wav"], # ["src/prompt/jazz_cn.wav"], # ["src/prompt/jazz_en.wav"], # ["src/prompt/pop_cn.wav"], # ["src/prompt/pop_en.wav"], # ["src/prompt/rap_cn.wav"], # ["src/prompt/rap_en.wav"], # ["src/prompt/rock_cn.wav"], # ["src/prompt/rock_en.wav"] # ], # inputs=[audio_prompt], # label="Audio Examples", # examples_per_page=12, # elem_id="audio-examples-container" # ) # gr.Examples( # examples=[ # ["Pop Emotional Piano"], # ["流行 情感 钢琴"], # ["Indie folk ballad, coming-of-age themes, acoustic guitar picking with harmonica interludes"], # ["独立民谣, 成长主题, 原声吉他弹奏与口琴间奏"] # ], # inputs=[text_prompt], # label="Text Examples", # examples_per_page=4, # elem_id="text-examples-container" # ) # gr.Examples( # examples=[ # ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""], # ["""[00:05.00]Stardust whispers in your eyes\n[00:09.30]Moonlight paints our silhouettes\n[00:13.75]Tides bring secrets from the deep\n[00:18.20]Where forever's breath is kept\n[00:22.90]We dance through constellations' maze\n[00:27.15]Footprints melt in cosmic waves\n[00:31.65]Horizons hum our silent vow\n[00:36.10]Time unravels here and now\n[00:40.85]Eternal embers in the night oh oh oh\n[00:45.25]Healing scars with liquid light\n[00:49.70]Galaxies write our refrain\n[00:54.15]Love reborn in endless rain\n[01:15.30]Paper boats of memories\n[01:19.75]Float through veins of ancient trees\n[01:24.20]Your laughter spins aurora threads\n[01:28.65]Weaving dawn through featherbed"""], # ["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""], # ["""[00:16.55]倦鸟西归 竹影余晖\n[00:23.58]禅意心扉\n[00:27.32]待清风 拂开一池春水\n[00:30.83]你的手绘 玉色难褪\n[00:37.99]我端详飘散的韵味\n[00:40.65]落款壶底的名讳\n[00:42.92]如吻西施的嘴\n[00:45.14]风雅几回 总相随\n[00:52.32]皆因你珍贵\n[00:57.85]三千弱水 煮一杯\n[01:02.21]我只饮下你的美\n[01:04.92]千年余味 紫砂壶伴我醉\n[01:09.73]酿一世无悔\n[01:12.09]沏壶春水 翠烟飞\n[01:16.62]把盏不尽你的香味\n[01:20.06]邀月相对 愿今生同宿同归\n[01:26.43]只让你陪\n[01:46.12]茗香芳菲 世俗无追\n"""] # ], # inputs=[lrc], # label="Lrc Examples", # examples_per_page=4, # elem_id="lrc-examples-container", # ) tabs.select( lambda s: None, None, None ) # TODO add max_frames parameter for infer_music lyrics_btn.click( fn=infer_music, inputs=[ lrc, current_prompt_type, audio_prompt, text_prompt, seed, randomize_seed, steps, cfg_strength, file_type, odeint_method, ], outputs=audio_output, ) # demo.queue().launch(show_api=False, show_error=True) if __name__ == "__main__": demo.launch()