Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| import json | |
| import random | |
| import numpy as np | |
| import base64 | |
| import spaces | |
| from diffrhythm2.utils import ( | |
| prepare_model, | |
| parse_lyrics, | |
| get_audio_prompt, | |
| get_text_prompt, | |
| inference, | |
| inference_stream | |
| ) | |
| lrc_tokenizer = None | |
| MAX_SEED = np.iinfo(np.int32).max | |
| device='cuda' | |
| dtype=torch.float16 | |
| diffrhythm2, mulan, lrc_tokenizer, decoder = prepare_model("ASLP-Lab/DiffRhythm2", device, dtype) | |
| def infer_music( | |
| lrc, | |
| current_prompt_type, | |
| audio_prompt=None, | |
| text_prompt=None, | |
| seed=42, | |
| randomize_seed=False, | |
| steps=16, | |
| cfg_strength=1.0, | |
| file_type='wav', | |
| odeint_method='euler', | |
| device='cuda' | |
| ): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| torch.manual_seed(seed) | |
| print(seed, current_prompt_type) | |
| try: | |
| lrc_prompt = parse_lyrics(lrc_tokenizer, lrc) | |
| lrc_prompt = torch.tensor(sum(lrc_prompt, []), dtype=torch.long, device=device) | |
| if current_prompt_type == "audio": | |
| style_prompt = get_audio_prompt(mulan, audio_prompt, device, dtype) | |
| else: | |
| style_prompt = get_text_prompt(mulan, text_prompt, device, dtype) | |
| except Exception as e: | |
| raise gr.Error(f"Error: {str(e)}") | |
| style_prompt = style_prompt.to(dtype) | |
| generate_song = inference( | |
| model=diffrhythm2, | |
| decoder=decoder, | |
| text=lrc_prompt, | |
| style_prompt=style_prompt, | |
| sample_steps=steps, | |
| cfg_strength=cfg_strength, | |
| odeint_method=odeint_method, | |
| duration=240, | |
| file_type=file_type | |
| ) | |
| return generate_song | |
| # for block in inference_stream( | |
| # model=diffrhythm2, | |
| # decoder=decoder, | |
| # text=lrc_prompt, | |
| # style_prompt=style_prompt, | |
| # sample_steps=steps, | |
| # cfg_strength=cfg_strength, | |
| # odeint_method=odeint_method, | |
| # duration=240, | |
| # file_type=file_type | |
| # ): | |
| # yield block | |
| css = """ | |
| /* 固定文本域高度并强制滚动条 */ | |
| .lyrics-scroll-box textarea { | |
| height: 405px !important; /* 固定高度 */ | |
| max-height: 500px !important; /* 最大高度 */ | |
| overflow-y: auto !important; /* 垂直滚动 */ | |
| white-space: pre-wrap; /* 保留换行 */ | |
| line-height: 1.5; /* 行高优化 */ | |
| } | |
| .gr-examples { | |
| background: transparent !important; | |
| border: 1px solid #e0e0e0 !important; | |
| border-radius: 8px; | |
| margin: 1rem 0 !important; | |
| padding: 1rem !important; | |
| } | |
| """ | |
| import base64 | |
| def image_to_base64(path): | |
| with open(path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| with gr.Blocks(css=css) as demo: | |
| gr.HTML(f""" | |
| <div style="flex: 1; text-align: center;"> | |
| <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px"> | |
| Di♪♪Rhythm 2 (谛韵) | |
| </div> | |
| <div style="display:flex; justify-content: center; column-gap:4px;"> | |
| <a href="https://arxiv.org/pdf/2510.22950"> | |
| <img src='https://img.shields.io/badge/Arxiv-Paper-blue'> | |
| </a> | |
| <a href="https://github.com/ASLP-lab/DiffRhythm2"> | |
| <img src='https://img.shields.io/badge/GitHub-Repo-green'> | |
| </a> | |
| <a href="https://aslp-lab.github.io/DiffRhythm2.github.io/"> | |
| <img src='https://img.shields.io/badge/Project-Page-brown'> | |
| </a> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Tabs() as tabs: | |
| # page 1 | |
| with gr.Tab("Music Generate", id=0): | |
| with gr.Row(): | |
| with gr.Column(): | |
| lrc = gr.Textbox( | |
| label="Lyrics", | |
| placeholder="Input the full lyrics", | |
| lines=12, | |
| max_lines=50, | |
| elem_classes="lyrics-scroll-box", | |
| value="""[start] | |
| [intro] | |
| [verse] | |
| Thought I heard your voice yesterday | |
| When I turned around to say | |
| That I loved you baby | |
| I realize it was juss my mind | |
| Played tricks on me | |
| And it seems colder lately at night | |
| And I try to sleep with the lights on | |
| Every time the phone rings | |
| I pray to God it's you | |
| And I just can't believe | |
| That we're through | |
| [chorus] | |
| I miss you | |
| There's no other way to say it | |
| And I can't deny it | |
| I miss you | |
| It's so easy to see | |
| I miss you and me | |
| [verse] | |
| Is it turning over this time | |
| Have we really changed our minds about each other's love | |
| All the feelings that we used to share | |
| I refuse to believe | |
| That you don't care | |
| [chorus] | |
| I miss you | |
| There's no other way to say it | |
| And I and I can't deny it | |
| I miss you | |
| [verse] | |
| It's so easy to see | |
| I've got to gather myself as together | |
| I've been through worst kinds of weather | |
| If it's over now | |
| [outro]""" | |
| ) | |
| current_prompt_type = gr.State(value="text") | |
| with gr.Tabs() as inside_tabs: | |
| with gr.Tab("Text Prompt"): | |
| text_prompt = gr.Textbox( | |
| label="Text Prompt", | |
| value="Pop, Piano, Bass, Drums, Happy", | |
| placeholder="Enter the Text Prompt, eg: emotional piano pop", | |
| ) | |
| with gr.Tab("Audio Prompt"): | |
| audio_prompt = gr.Audio(label="Audio Prompt", type="filepath") | |
| def update_prompt_type(evt: gr.SelectData): | |
| return "text" if evt.index == 0 else "audio" | |
| inside_tabs.select( | |
| fn=update_prompt_type, | |
| outputs=current_prompt_type | |
| ) | |
| with gr.Column(): | |
| with gr.Accordion("Best Practices Guide", open=True): | |
| gr.Markdown(""" | |
| 1. **Lyrics Format Requirements** | |
| - Each line must follow: `Lyric content` | |
| - Example of valid format: | |
| ``` | |
| [intro] | |
| [verse] | |
| Thought I heard your voice yesterday | |
| When I turned around to say | |
| ``` | |
| 2. **Audio Prompt Requirements** | |
| - Reference audio should be ≥ 1 second, Audio >10 seconds will be randomly clipped into 10 seconds | |
| - For optimal results, the 10-second clips should be carefully selected | |
| - Shorter clips may lead to incoherent generation | |
| 3. **Supported Languages** | |
| - Chinese and English | |
| **Due to issues with Gradio's streaming audio output, we will update the streaming feature in the future. Please stay tuned!** | |
| """) | |
| lyrics_btn = gr.Button("Generate", variant="primary") | |
| # audio_output = gr.Gallery(label="Audio Results") | |
| audio_output = gr.Audio(label="Audio Result", elem_id="audio_output") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
| steps = gr.Slider( | |
| minimum=10, | |
| maximum=100, | |
| value=16, | |
| step=1, | |
| label="Diffusion Steps", | |
| interactive=True, | |
| elem_id="step_slider" | |
| ) | |
| cfg_strength = gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=1.3, | |
| step=0.5, | |
| label="CFG Strength", | |
| interactive=True, | |
| elem_id="step_slider" | |
| ) | |
| odeint_method = gr.Radio(["euler", "midpoint", "rk4","implicit_adams"], label="ODE Solver", value="euler") | |
| file_type = gr.Dropdown(["wav", "mp3", "ogg"], label="Output Format", value="mp3") | |
| # gr.Examples( | |
| # examples=[ | |
| # ["src/prompt/classic_cn.wav"], | |
| # ["src/prompt/classic_en.wav"], | |
| # ["src/prompt/country_cn.wav"], | |
| # ["src/prompt/country_en.wav"], | |
| # ["src/prompt/jazz_cn.wav"], | |
| # ["src/prompt/jazz_en.wav"], | |
| # ["src/prompt/pop_cn.wav"], | |
| # ["src/prompt/pop_en.wav"], | |
| # ["src/prompt/rap_cn.wav"], | |
| # ["src/prompt/rap_en.wav"], | |
| # ["src/prompt/rock_cn.wav"], | |
| # ["src/prompt/rock_en.wav"] | |
| # ], | |
| # inputs=[audio_prompt], | |
| # label="Audio Examples", | |
| # examples_per_page=12, | |
| # elem_id="audio-examples-container" | |
| # ) | |
| # gr.Examples( | |
| # examples=[ | |
| # ["Pop Emotional Piano"], | |
| # ["流行 情感 钢琴"], | |
| # ["Indie folk ballad, coming-of-age themes, acoustic guitar picking with harmonica interludes"], | |
| # ["独立民谣, 成长主题, 原声吉他弹奏与口琴间奏"] | |
| # ], | |
| # inputs=[text_prompt], | |
| # label="Text Examples", | |
| # examples_per_page=4, | |
| # elem_id="text-examples-container" | |
| # ) | |
| # gr.Examples( | |
| # examples=[ | |
| # ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""], | |
| # ["""[00:05.00]Stardust whispers in your eyes\n[00:09.30]Moonlight paints our silhouettes\n[00:13.75]Tides bring secrets from the deep\n[00:18.20]Where forever's breath is kept\n[00:22.90]We dance through constellations' maze\n[00:27.15]Footprints melt in cosmic waves\n[00:31.65]Horizons hum our silent vow\n[00:36.10]Time unravels here and now\n[00:40.85]Eternal embers in the night oh oh oh\n[00:45.25]Healing scars with liquid light\n[00:49.70]Galaxies write our refrain\n[00:54.15]Love reborn in endless rain\n[01:15.30]Paper boats of memories\n[01:19.75]Float through veins of ancient trees\n[01:24.20]Your laughter spins aurora threads\n[01:28.65]Weaving dawn through featherbed"""], | |
| # ["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""], | |
| # ["""[00:16.55]倦鸟西归 竹影余晖\n[00:23.58]禅意心扉\n[00:27.32]待清风 拂开一池春水\n[00:30.83]你的手绘 玉色难褪\n[00:37.99]我端详飘散的韵味\n[00:40.65]落款壶底的名讳\n[00:42.92]如吻西施的嘴\n[00:45.14]风雅几回 总相随\n[00:52.32]皆因你珍贵\n[00:57.85]三千弱水 煮一杯\n[01:02.21]我只饮下你的美\n[01:04.92]千年余味 紫砂壶伴我醉\n[01:09.73]酿一世无悔\n[01:12.09]沏壶春水 翠烟飞\n[01:16.62]把盏不尽你的香味\n[01:20.06]邀月相对 愿今生同宿同归\n[01:26.43]只让你陪\n[01:46.12]茗香芳菲 世俗无追\n"""] | |
| # ], | |
| # inputs=[lrc], | |
| # label="Lrc Examples", | |
| # examples_per_page=4, | |
| # elem_id="lrc-examples-container", | |
| # ) | |
| tabs.select( | |
| lambda s: None, | |
| None, | |
| None | |
| ) | |
| # TODO add max_frames parameter for infer_music | |
| lyrics_btn.click( | |
| fn=infer_music, | |
| inputs=[ | |
| lrc, | |
| current_prompt_type, | |
| audio_prompt, | |
| text_prompt, | |
| seed, | |
| randomize_seed, | |
| steps, | |
| cfg_strength, | |
| file_type, | |
| odeint_method, | |
| ], | |
| outputs=audio_output, | |
| ) | |
| # demo.queue().launch(show_api=False, show_error=True) | |
| if __name__ == "__main__": | |
| demo.launch() | |