Spaces:

Bils
/

AIPromoStudio

Sleeping

App Files Files Community

AIPromoStudio / app.py

Bils

Update app.py

a38649c verified 11 months ago

raw

history blame

8.52 kB

	import gradio as gr
	import os
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	pipeline,
	AutoProcessor,
	MusicgenForConditionalGeneration,
	)
	from scipy.io.wavfile import write
	from pydub import AudioSegment
	from dotenv import load_dotenv
	import tempfile
	import spaces
	from TTS.api import TTS
	from TTS.utils.synthesizer import Synthesizer

	# Load environment variables
	load_dotenv()
	hf_token = os.getenv("HF_TOKEN")

	# ---------------------------------------------------------------------
	# Script Generation Function
	# ---------------------------------------------------------------------
	@spaces.GPU(duration=100)
	def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	use_auth_token=token,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	)
	llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

	# System prompt with clear structure instructions
	system_prompt = (
	f"You are an expert radio imaging producer specializing in sound design and music. "
	f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
	f"1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
	f"2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
	f"3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
	)

	combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
	result = llama_pipeline(combined_prompt, max_new_tokens=300, do_sample=True, temperature=0.8)

	# Parsing output
	generated_text = result[0]["generated_text"].split("Output:")[-1].strip()

	# Extract sections based on prefixes
	voice_script = generated_text.split("Voice-Over Script:")[1].split("Sound Design Suggestions:")[0].strip() if "Voice-Over Script:" in generated_text else "No voice-over script found."
	sound_design = generated_text.split("Sound Design Suggestions:")[1].split("Music Suggestions:")[0].strip() if "Sound Design Suggestions:" in generated_text else "No sound design suggestions found."
	music_suggestions = generated_text.split("Music Suggestions:")[1].strip() if "Music Suggestions:" in generated_text else "No music suggestions found."

	return voice_script, sound_design, music_suggestions
	except Exception as e:
	return f"Error generating script: {e}", "", ""

	# ---------------------------------------------------------------------
	# Voice-Over Generation Function (Inactive)
	# ---------------------------------------------------------------------
	@spaces.GPU(duration=100)
	def generate_voice(script: str, speaker: str = "default"):
	try:
	# Placeholder for inactive state
	return "Voice-over generation is currently inactive."
	except Exception as e:
	return f"Error: {e}"

	# ---------------------------------------------------------------------
	# Music Generation Function (facebook/musicgen-medium)
	# ---------------------------------------------------------------------
	@spaces.GPU(duration=100)
	def generate_music(prompt: str, audio_length: int):
	try:
	# Load facebook/musicgen-medium model
	musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-medium")
	musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-medium")

	# Move the model to the appropriate device (CUDA or CPU)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	musicgen_model.to(device)

	# Prepare inputs
	inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt").to(device)

	# Generate music
	outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)

	# Process audio data
	audio_data = outputs[0, 0].cpu().numpy()
	normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")

	# Save generated music to a file
	output_path = f"{tempfile.gettempdir()}/musicgen_medium_generated_music.wav"
	write(output_path, 44100, normalized_audio)

	return output_path

	except Exception as e:
	return f"Error generating music: {e}"


	# ---------------------------------------------------------------------
	# Audio Blending Function with Ducking (Inactive)
	# ---------------------------------------------------------------------
	def blend_audio(voice_path: str, music_path: str, ducking: bool):
	try:
	# Placeholder for inactive state
	return "Audio blending functionality is currently inactive."
	except Exception as e:
	return f"Error: {e}"


	# ---------------------------------------------------------------------
	# Gradio Interface
	# ---------------------------------------------------------------------
	with gr.Blocks() as demo:
	gr.Markdown("""
	# 🎧 AI Promo Studio 🚀
	Welcome to AI Promo Studio, your one-stop solution for creating stunning and professional radio promos with ease!
	Whether you're a sound designer, radio producer, or content creator, our AI-driven tools, powered by advanced LLM Llama models, empower you to bring your vision to life in just a few steps.
	""")

	with gr.Tabs():
	# Step 1: Generate Script
	with gr.Tab("Step 1: Generate Script"):
	with gr.Row():
	user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
	llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
	duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)

	generate_script_button = gr.Button("Generate Script")
	script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5)
	sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3)
	music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3)

	generate_script_button.click(
	fn=lambda user_prompt, model_id, duration: generate_script(user_prompt, model_id, hf_token, duration),
	inputs=[user_prompt, llama_model_id, duration],
	outputs=[script_output, sound_design_output, music_suggestion_output],
	)

	# Step 2: Generate Voice
	with gr.Tab("Step 2: Generate Voice"):
	gr.Markdown("""
	Note: Voice-over generation is currently inactive.
	This feature will be available in future updates!
	""")


	# Step 3: Generate Music
	with gr.Tab("Step 3: Generate Music"):
	with gr.Row():
	audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)

	generate_music_button = gr.Button("Generate Music")
	music_output = gr.Audio(label="Generated Music", type="filepath")

	generate_music_button.click(
	fn=lambda music_suggestion, audio_length: generate_music(music_suggestion, audio_length),
	inputs=[music_suggestion_output, audio_length],
	outputs=[music_output],
	)

	# Step 4: Blend Audio
	with gr.Tab("Step 4: Blend Audio"):
	gr.Markdown("""
	Note: Audio blending functionality is currently inactive.
	This feature will be available in future updates!
	""")


	gr.Markdown("""
	<hr>
	<p style="text-align: center; font-size: 0.9em;">
	Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
	</p>
	""")

	# Add visitor badge HTML
	gr.HTML("""
	<a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
	<img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold&countColor=%23263759" />
	</a>
	""")

	demo.launch(debug=True)