Spaces:

SuriC-nyc
/

MagicMusicMachine

Sleeping

App Files Files Community

MagicMusicMachine / app.py

suric

update apps and examples

48860c6 over 1 year ago

raw

history blame

18.7 kB

	import os
	import gradio as gr
	from audiocraft.models import MAGNeT, MusicGen, AudioGen

	# from gradio_components.image import generate_caption, improve_prompt
	from gradio_components.image import generate_caption_gpt4
	from gradio_components.prediction import predict, transcribe

	import re
	import argparse
	from gradio_components.model_cards import TEXT_TO_MIDI_MODELS, TEXT_TO_SOUND_MODELS, MELODY_CONTINUATION_MODELS, TEXT_TO_MUSIC_MODELS, MODEL_CARDS, MELODY_CONDITIONED_MODELS
	import ast
	import json




	theme = gr.themes.Glass(
	primary_hue="fuchsia",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=[
	gr.themes.GoogleFont("Source Sans Pro"),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	],
	).set(
	body_background_fill_dark="*background_fill_primary",
	embed_radius="*table_radius",
	background_fill_primary="*neutral_50",
	background_fill_primary_dark="*neutral_950",
	background_fill_secondary_dark="*neutral_900",
	border_color_accent="*neutral_600",
	border_color_accent_subdued="*color_accent",
	border_color_primary_dark="*neutral_700",
	block_background_fill="*background_fill_primary",
	block_background_fill_dark="*neutral_800",
	block_border_width="1px",
	block_label_background_fill="*background_fill_primary",
	block_label_background_fill_dark="*background_fill_secondary",
	block_label_text_color="*neutral_500",
	block_label_text_size="*text_sm",
	block_label_text_weight="400",
	block_shadow="none",
	block_shadow_dark="none",
	block_title_text_color="*neutral_500",
	block_title_text_weight="400",
	panel_border_width="0",
	panel_border_width_dark="0",
	checkbox_background_color_dark="*neutral_800",
	checkbox_border_width="*input_border_width",
	checkbox_label_border_width="*input_border_width",
	input_background_fill="*neutral_100",
	input_background_fill_dark="*neutral_700",
	input_border_color_focus_dark="*neutral_700",
	input_border_width="0px",
	input_border_width_dark="0px",
	slider_color="#2563eb",
	slider_color_dark="#2563eb",
	table_even_background_fill_dark="*neutral_950",
	table_odd_background_fill_dark="*neutral_900",
	button_border_width="*input_border_width",
	button_shadow_active="none",
	button_primary_background_fill="*primary_200",
	button_primary_background_fill_dark="*primary_700",
	button_primary_background_fill_hover="*button_primary_background_fill",
	button_primary_background_fill_hover_dark="*button_primary_background_fill",
	button_secondary_background_fill="*neutral_200",
	button_secondary_background_fill_dark="*neutral_600",
	button_secondary_background_fill_hover="*button_secondary_background_fill",
	button_secondary_background_fill_hover_dark="*button_secondary_background_fill",
	button_cancel_background_fill="*button_secondary_background_fill",
	button_cancel_background_fill_dark="*button_secondary_background_fill",
	button_cancel_background_fill_hover="*button_cancel_background_fill",
	button_cancel_background_fill_hover_dark="*button_cancel_background_fill",
	)



	def generate_prompt(prompt, style):
	prompt = ','.join([prompt]+style)
	return prompt


	def UI(share=False):
	with gr.Blocks() as demo:
	with gr.Tab("Generate Music by text"):
	with gr.Row():
	with gr.Column():
	with gr.Row():
	model_path = gr.Dropdown(
	choices=TEXT_TO_MUSIC_MODELS,
	label="Select the model",
	value="facebook/musicgen-large",
	)

	with gr.Row():
	text_prompt = gr.Textbox(
	label="Let's make a song about ...",
	value="First day learning music generation in Standford university",
	interactive=True,
	visible=True,
	)
	num_outputs = gr.Number(
	label="Number of outputs",
	value=1,
	minimum=1,
	maximum=10,
	interactive=True,
	)

	with gr.Row():
	style = gr.CheckboxGroup(
	["Jazz", "Classical Music", "Hip Hop", "Ragga Jungle", "Dark Jazz", "Soul", "Blues", "80s Rock N Roll"],
	value=None,
	label="music genre",
	interactive=True,
	)
	@gr.on(inputs=[style], outputs=text_prompt)
	def update_prompt(style):
	return generate_prompt(text_prompt.value, style)

	config_output_textbox = gr.Textbox(label="Model Configs", visible=False)

	@gr.render(inputs=model_path)
	def show_config_options(model_path):
	print(model_path)

	with gr.Accordion("Model Generation Configs"):
	if "magnet" in model_path:
	with gr.Row():
	top_k = gr.Number(label="Top-k", value=300, interactive=True)
	top_p = gr.Number(label="Top-p", value=0, interactive=True)
	temperature = gr.Number(
	label="Temperature", value=1.0, interactive=True
	)
	span_arrangement = gr.Radio(["nonoverlap", "stride1"], value='nonoverlap', label="span arrangment", info=" Use either non-overlapping spans ('nonoverlap') or overlapping spans ('stride1') ")
	@gr.on(inputs=[top_k, top_p, temperature, span_arrangement], outputs=config_output_textbox)
	def return_model_configs(top_k, top_p, temperature, span_arrangement):
	return {"top_k": top_k, "top_p": top_p, "temperature": temperature, "span_arrangement": span_arrangement}
	else:
	with gr.Row():
	duration = gr.Slider(
	minimum=10,
	maximum=30,
	value=30,
	label="Duration",
	interactive=True,
	)
	use_sampling = gr.Checkbox(label="Use Sampling", interactive=True, value=True)
	top_k = gr.Number(label="Top-k", value=300, interactive=True)
	top_p = gr.Number(label="Top-p", value=0, interactive=True)
	temperature = gr.Number(
	label="Temperature", value=1.0, interactive=True
	)
	@gr.on(inputs=[duration, use_sampling, top_k, top_p, temperature], outputs=config_output_textbox)
	def return_model_configs(duration, use_sampling, top_k, top_p, temperature):
	return {"duration": duration, "use_sampling": use_sampling, "top_k": top_k, "top_p": top_p, "temperature": temperature}

	with gr.Column():
	with gr.Row():
	melody = gr.Audio(sources=["upload"], type="numpy", label="File",
	interactive=True, elem_id="melody-input", visible=False)
	submit = gr.Button("Generate Music")
	result_text = gr.Textbox(label="Generated Music (text)", type="text", interactive=False)
	print(result_text)
	output_audios = []
	@gr.render(inputs=result_text)
	def show_output_audio(tmp_paths):
	if tmp_paths:
	tmp_paths = ast.literal_eval(tmp_paths)
	print(tmp_paths)
	for i in range(len(tmp_paths)):
	tmp_path = tmp_paths[i]
	_audio = gr.Audio(value=tmp_path , label=f"Generated Music {i}", type='filepath', interactive=False, visible=True)
	output_audios.append(_audio)

	submit.click(
	fn=predict,
	inputs=[model_path, config_output_textbox, text_prompt, melody, num_outputs],
	outputs=result_text,
	queue=True
	)


	with gr.Tab("Generate Music by melody"):
	with gr.Column():
	with gr.Row():
	radio_melody_condition = gr.Radio(["Muisc Continuation", "Music Conditioning"], value=None, label="Select the condition")
	model_path2 = gr.Dropdown(label="model")
	@gr.on(inputs=radio_melody_condition, outputs=model_path2)
	def model_selection(radio_melody_condition):
	if radio_melody_condition == "Muisc Continuation":
	model_path2 = gr.Dropdown(
	choices=MELODY_CONTINUATION_MODELS,
	label="Select the model",
	value="facebook/musicgen-large",
	interactive=True,
	visible=True
	)
	elif radio_melody_condition == "Music Conditioning":
	model_path2 = gr.Dropdown(
	choices=MELODY_CONDITIONED_MODELS,
	label="Select the model",
	value="facebook/musicgen-melody-large",
	interactive=True,
	visible=True
	)
	else:
	model_path2 = gr.Dropdown(
	choices=TEXT_TO_SOUND_MODELS,
	label="Select the model",
	value="facebook/musicgen-large",
	interactive=True,
	visible=False
	)
	return model_path2
	upload_melody = gr.Audio(sources=["upload", "microphone"], type="filepath", label="File")
	prompt_text2 = gr.Textbox(
	label="Let's make a song about ...",
	value=None,
	interactive=True,
	visible=True,
	)
	with gr.Row():
	config_output_textbox2 = gr.Textbox(
	label="Model Configs",
	visible=True)
	with gr.Row():
	duration2 = gr.Number(10, label="Duration", interactive=True)
	num_outputs2 = gr.Number(1, label="Number of outputs", interactive=True)

	@gr.on(inputs=[duration2], outputs=config_output_textbox2)
	def return_model_configs2(duration):
	return {"duration": duration, "use_sampling": True, "top_k": 300, "top_p": 0, "temperature": 1}
	submit2 = gr.Button("Generate Music")
	result_text2 = gr.Textbox(label="Generated Music (melody)", type="text", interactive=False, visible=True)
	submit2.click(
	fn=predict,
	inputs=[model_path2, config_output_textbox2, prompt_text2, upload_melody, num_outputs2],
	outputs=result_text2,
	queue=True
	)

	@gr.render(inputs=result_text2)
	def show_output_audio(tmp_paths):
	if tmp_paths:
	tmp_paths = ast.literal_eval(tmp_paths)
	print(tmp_paths)
	for i in range(len(tmp_paths)):
	tmp_path = tmp_paths[i]
	_audio = gr.Audio(value=tmp_path , label=f"Generated Music {i}", type='filepath', interactive=False)
	output_audios.append(_audio)
	gr.Examples(
	examples = [
	[
	os.path.join(
	os.path.dirname(__file__), "./data/audio/Suri's Improv.mp3"
	),
	30,
	"facebook/musicgen-large",
	"Muisc Continuation",
	],
	[
	os.path.join(
	os.path.dirname(__file__), "./data/audio/lie_no_tomorrow_20sec.wav"
	),
	40,
	"facebook/musicgen-melody-large",
	"Music Conditioning",
	]
	],
	inputs=[upload_melody, duration2, model_path2, radio_melody_condition],
	)

	with gr.Tab("Generate Music by image"):
	with gr.Column():
	with gr.Row():
	image_input = gr.Image("Upload an image", type="filepath")
	with gr.Accordion("Image Captioning", open=False):
	image_description = gr.Textbox(label='image description', visible=True, interactive=False)
	image_caption = gr.Textbox(label='generated text prompt', visible=True, interactive=True)
	@gr.on(inputs=image_input, outputs=[image_description, image_caption])
	def generate_image_text_prompt(image_input):
	if image_input:
	image_description, image_caption = generate_caption_gpt4(image_input, model_path)
	# meesage_object, description, prompt = generate_caption_claude3(image_input, model_path)
	return image_description, image_caption
	return "", ""
	with gr.Row():
	melody3 = gr.Audio(sources=["upload", "microphone"], type="filepath", label="File", visible=True)
	with gr.Column():
	model_path3 = gr.Dropdown(
	choices=TEXT_TO_SOUND_MODELS + TEXT_TO_MUSIC_MODELS + MELODY_CONDITIONED_MODELS,
	label="Select the model",
	value="facebook/musicgen-large",
	)
	duration3 = gr.Number(30, visible=False, label="Duration")
	submit3 = gr.Button("Generate Music")
	result_text3 = gr.Textbox(label="Generated Music (image)", type="text", interactive=False, visible=True)
	def predict_image_music(model_path3, image_caption, duration3, melody3):
	model_configs = {"duration": duration3, "use_sampling": True, "top_k": 250, "top_p": 0, "temperature": 1}
	return predict(
	model_version = model_path3,
	generation_configs = model_configs,
	prompt_text = image_caption,
	prompt_wav = melody3
	)

	submit3.click(
	fn=predict_image_music,
	inputs=[model_path3, image_caption, duration3, melody3],
	outputs=result_text3,
	queue=True
	)

	@gr.render(inputs=result_text3)
	def show_output_audio(tmp_paths):
	if tmp_paths:
	tmp_paths = ast.literal_eval(tmp_paths)
	print(tmp_paths)
	for i in range(len(tmp_paths)):
	tmp_path = tmp_paths[i]
	_audio = gr.Audio(value=tmp_path , label=f"Generated Music {i}", type='filepath', interactive=False)
	output_audios.append(_audio)

	@gr.render(inputs=result_text3)
	def show_transcribt_audio(tmp_paths):
	transcribe(tmp_paths)
	gr.Examples(
	examples = [
	[
	os.path.join(
	os.path.dirname(__file__), "./data/image/beach.jpeg"
	),
	"facebook/musicgen-large",
	30,
	None,
	],
	[
	os.path.join(
	os.path.dirname(__file__), "./data/image/beach.jpeg"
	),
	"facebook/audiogen-medium",
	15,
	None,
	],
	[
	os.path.join(
	os.path.dirname(__file__), "./data/image/beach.jpeg"
	),
	"facebook/musicgen-melody-large",
	30,
	os.path.join(
	os.path.dirname(__file__), "./data/audio/Suri's Improv.mp3"
	),
	],
	[
	os.path.join(
	os.path.dirname(__file__), "./data/image/cat.jpeg"
	),
	"facebook/musicgen-large",
	30,
	None,
	],
	],
	inputs=[image_input, model_path3, duration3, melody3],
	)

	demo.queue().launch(share=share)


	if __name__ == "__main__":
	# Create the parser
	parser = argparse.ArgumentParser()
	parser.add_argument('--share', action='store_true', help='Enable sharing.')
	args = parser.parse_args()

	UI(share=args.share)