Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
009c9f3
1
Parent(s):
50a3943
Add API endpoint and fix deprecation warnings
Browse files- Fixed torchaudio deprecation warning by removing torio import
- Added API endpoint for external calls at /generate
- Created example code for calling from other spaces
- Added VibeVoice integration helper class
- Made music generation accessible via Gradio Client API
- api_usage_example.py +170 -0
- app.py +72 -0
- pipeline_ace_step.py +3 -2
- vibevoice_integration.py +250 -0
api_usage_example.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Example code for calling ACE-Music-Generator from another Hugging Face Space
|
| 3 |
+
|
| 4 |
+
This shows how to use the ACE-Music-Generator API from your podcast space
|
| 5 |
+
or any other Python application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from gradio_client import Client
|
| 9 |
+
import tempfile
|
| 10 |
+
import requests
|
| 11 |
+
|
| 12 |
+
# Method 1: Using Gradio Client (Recommended for Spaces)
|
| 13 |
+
def generate_music_from_space(
|
| 14 |
+
duration=20,
|
| 15 |
+
tags="edm, synth, bass, 128 bpm, energetic",
|
| 16 |
+
lyrics="[instrumental]",
|
| 17 |
+
space_name="ACloudCenter/ACE-Music-Generator"
|
| 18 |
+
):
|
| 19 |
+
"""
|
| 20 |
+
Generate music using the ACE-Music-Generator space API
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
duration: Duration in seconds
|
| 24 |
+
tags: Music style tags
|
| 25 |
+
lyrics: Lyrics or [instrumental]
|
| 26 |
+
space_name: Your Hugging Face space name
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
audio_file_path: Path to downloaded audio file
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
# Connect to your space
|
| 33 |
+
client = Client(space_name)
|
| 34 |
+
|
| 35 |
+
# Call the generate function
|
| 36 |
+
result = client.predict(
|
| 37 |
+
duration,
|
| 38 |
+
tags,
|
| 39 |
+
lyrics,
|
| 40 |
+
60, # infer_steps
|
| 41 |
+
15.0, # guidance_scale
|
| 42 |
+
api_name="/generate"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Result is the path to the audio file
|
| 46 |
+
return result
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"Error generating music: {e}")
|
| 49 |
+
return None
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Method 2: Direct HTTP API call
|
| 53 |
+
def generate_music_http(
|
| 54 |
+
duration=20,
|
| 55 |
+
tags="edm, synth, bass, 128 bpm, energetic",
|
| 56 |
+
lyrics="[instrumental]",
|
| 57 |
+
space_url="https://acloudcenter-ace-music-generator.hf.space"
|
| 58 |
+
):
|
| 59 |
+
"""
|
| 60 |
+
Generate music using direct HTTP API call
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
duration: Duration in seconds
|
| 64 |
+
tags: Music style tags
|
| 65 |
+
lyrics: Lyrics or [instrumental]
|
| 66 |
+
space_url: Your space URL
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
audio_file_path: Path to downloaded audio file
|
| 70 |
+
"""
|
| 71 |
+
import json
|
| 72 |
+
|
| 73 |
+
api_url = f"{space_url}/run/generate"
|
| 74 |
+
|
| 75 |
+
payload = {
|
| 76 |
+
"data": [
|
| 77 |
+
duration,
|
| 78 |
+
tags,
|
| 79 |
+
lyrics,
|
| 80 |
+
60, # infer_steps
|
| 81 |
+
15.0, # guidance_scale
|
| 82 |
+
]
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
# Make the API call
|
| 87 |
+
response = requests.post(api_url, json=payload)
|
| 88 |
+
|
| 89 |
+
if response.status_code == 200:
|
| 90 |
+
result = response.json()
|
| 91 |
+
# Download the audio file
|
| 92 |
+
audio_url = result["data"][0]["url"]
|
| 93 |
+
|
| 94 |
+
# Save to temp file
|
| 95 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
| 96 |
+
audio_response = requests.get(audio_url)
|
| 97 |
+
f.write(audio_response.content)
|
| 98 |
+
return f.name
|
| 99 |
+
else:
|
| 100 |
+
print(f"Error: {response.status_code}")
|
| 101 |
+
return None
|
| 102 |
+
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"Error generating music: {e}")
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# Example usage in your podcast generator
|
| 109 |
+
def add_background_music_to_podcast():
|
| 110 |
+
"""
|
| 111 |
+
Example of how to use in your podcast space
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
# Generate a 20-second EDM track
|
| 115 |
+
music_path = generate_music_from_space(
|
| 116 |
+
duration=20,
|
| 117 |
+
tags="edm, ambient, soft, background, 100 bpm, calm",
|
| 118 |
+
lyrics="[instrumental]"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
if music_path:
|
| 122 |
+
print(f"Generated music saved to: {music_path}")
|
| 123 |
+
# Now you can use this in your podcast generation
|
| 124 |
+
# For example, mix it with your podcast audio
|
| 125 |
+
return music_path
|
| 126 |
+
else:
|
| 127 |
+
print("Failed to generate music")
|
| 128 |
+
return None
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# Different music styles you can generate
|
| 132 |
+
MUSIC_STYLES = {
|
| 133 |
+
"podcast_intro": "upbeat, electronic, professional, 120 bpm, energetic, modern",
|
| 134 |
+
"podcast_outro": "calm, ambient, soft, 80 bpm, relaxing, fade out",
|
| 135 |
+
"news_background": "minimal, electronic, subtle, 90 bpm, serious, professional",
|
| 136 |
+
"commercial": "pop, upbeat, catchy, 128 bpm, happy, commercial",
|
| 137 |
+
"dramatic": "orchestral, dramatic, cinematic, 100 bpm, intense, emotional",
|
| 138 |
+
"tech": "electronic, futuristic, synth, 110 bpm, innovative, modern",
|
| 139 |
+
"chill": "lofi, relaxed, warm, 75 bpm, cozy, background",
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def generate_podcast_music(style="podcast_intro", duration=15):
|
| 144 |
+
"""
|
| 145 |
+
Generate music for different podcast segments
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
style: One of the predefined styles
|
| 149 |
+
duration: Duration in seconds
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
audio_file_path: Path to generated audio
|
| 153 |
+
"""
|
| 154 |
+
tags = MUSIC_STYLES.get(style, MUSIC_STYLES["podcast_intro"])
|
| 155 |
+
|
| 156 |
+
return generate_music_from_space(
|
| 157 |
+
duration=duration,
|
| 158 |
+
tags=tags,
|
| 159 |
+
lyrics="[instrumental]"
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
if __name__ == "__main__":
|
| 164 |
+
# Test the API
|
| 165 |
+
print("Generating test music...")
|
| 166 |
+
audio_file = generate_podcast_music(style="podcast_intro", duration=10)
|
| 167 |
+
if audio_file:
|
| 168 |
+
print(f"Success! Audio saved to: {audio_file}")
|
| 169 |
+
else:
|
| 170 |
+
print("Failed to generate audio")
|
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from ui.components import create_main_demo_ui
|
|
| 3 |
from pipeline_ace_step import ACEStepPipeline
|
| 4 |
from data_sampler import DataSampler
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
parser = argparse.ArgumentParser()
|
|
@@ -30,12 +31,83 @@ def main(args):
|
|
| 30 |
torch_compile=args.torch_compile
|
| 31 |
)
|
| 32 |
data_sampler = DataSampler()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
demo = create_main_demo_ui(
|
| 35 |
text2music_process_func=model_demo.__call__,
|
| 36 |
sample_data_func=data_sampler.sample,
|
| 37 |
load_data_func=data_sampler.load_json,
|
| 38 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
demo.queue(default_concurrency_limit=8).launch(
|
| 40 |
server_name=args.server_name,
|
| 41 |
server_port=args.port,
|
|
|
|
| 3 |
from pipeline_ace_step import ACEStepPipeline
|
| 4 |
from data_sampler import DataSampler
|
| 5 |
import os
|
| 6 |
+
import gradio as gr
|
| 7 |
|
| 8 |
|
| 9 |
parser = argparse.ArgumentParser()
|
|
|
|
| 31 |
torch_compile=args.torch_compile
|
| 32 |
)
|
| 33 |
data_sampler = DataSampler()
|
| 34 |
+
|
| 35 |
+
# Create API function for external calls
|
| 36 |
+
def generate_music_api(
|
| 37 |
+
duration: float = 20.0,
|
| 38 |
+
tags: str = "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
|
| 39 |
+
lyrics: str = "[instrumental]",
|
| 40 |
+
infer_steps: int = 60,
|
| 41 |
+
guidance_scale: float = 15.0,
|
| 42 |
+
):
|
| 43 |
+
"""
|
| 44 |
+
API function to generate music
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
duration: Duration in seconds (default 20)
|
| 48 |
+
tags: Music tags/style description
|
| 49 |
+
lyrics: Lyrics or [instrumental] for no vocals
|
| 50 |
+
infer_steps: Inference steps (default 60)
|
| 51 |
+
guidance_scale: Guidance scale (default 15.0)
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
audio_path: Path to generated audio file
|
| 55 |
+
"""
|
| 56 |
+
result = model_demo(
|
| 57 |
+
audio_duration=duration,
|
| 58 |
+
prompt=tags,
|
| 59 |
+
lyrics=lyrics,
|
| 60 |
+
infer_step=infer_steps,
|
| 61 |
+
guidance_scale=guidance_scale,
|
| 62 |
+
scheduler_type="euler",
|
| 63 |
+
cfg_type="apg",
|
| 64 |
+
omega_scale=10.0,
|
| 65 |
+
manual_seeds=None,
|
| 66 |
+
guidance_interval=0.5,
|
| 67 |
+
guidance_interval_decay=0.0,
|
| 68 |
+
min_guidance_scale=3.0,
|
| 69 |
+
use_erg_tag=True,
|
| 70 |
+
use_erg_lyric=False,
|
| 71 |
+
use_erg_diffusion=True,
|
| 72 |
+
oss_steps=None,
|
| 73 |
+
guidance_scale_text=0.0,
|
| 74 |
+
guidance_scale_lyric=0.0,
|
| 75 |
+
audio2audio_enable=False,
|
| 76 |
+
ref_audio_strength=0.5,
|
| 77 |
+
ref_audio_input=None,
|
| 78 |
+
lora_name_or_path="none"
|
| 79 |
+
)
|
| 80 |
+
# Return the audio file path
|
| 81 |
+
if result and len(result) > 0:
|
| 82 |
+
return result[0] # Return first audio output
|
| 83 |
+
return None
|
| 84 |
|
| 85 |
demo = create_main_demo_ui(
|
| 86 |
text2music_process_func=model_demo.__call__,
|
| 87 |
sample_data_func=data_sampler.sample,
|
| 88 |
load_data_func=data_sampler.load_json,
|
| 89 |
)
|
| 90 |
+
|
| 91 |
+
# Add API endpoint to the demo
|
| 92 |
+
demo.api_open = True
|
| 93 |
+
demo.api_name = "/generate_music"
|
| 94 |
+
|
| 95 |
+
# Make the API function available
|
| 96 |
+
with demo:
|
| 97 |
+
gr.Interface(
|
| 98 |
+
fn=generate_music_api,
|
| 99 |
+
inputs=[
|
| 100 |
+
gr.Number(value=20, label="Duration (seconds)"),
|
| 101 |
+
gr.Textbox(value="edm, synth, bass, 128 bpm, energetic", label="Tags"),
|
| 102 |
+
gr.Textbox(value="[instrumental]", label="Lyrics"),
|
| 103 |
+
gr.Number(value=60, label="Inference Steps"),
|
| 104 |
+
gr.Number(value=15.0, label="Guidance Scale"),
|
| 105 |
+
],
|
| 106 |
+
outputs=gr.Audio(type="filepath", label="Generated Music"),
|
| 107 |
+
api_name="generate",
|
| 108 |
+
visible=False # Hide this interface, it's only for API
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
demo.queue(default_concurrency_limit=8).launch(
|
| 112 |
server_name=args.server_name,
|
| 113 |
server_port=args.port,
|
pipeline_ace_step.py
CHANGED
|
@@ -36,7 +36,7 @@ from apg_guidance import (
|
|
| 36 |
cfg_double_condition_forward,
|
| 37 |
)
|
| 38 |
import torchaudio
|
| 39 |
-
import torio
|
| 40 |
|
| 41 |
|
| 42 |
torch.backends.cudnn.benchmark = False
|
|
@@ -1428,12 +1428,13 @@ class ACEStepPipeline:
|
|
| 1428 |
f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
|
| 1429 |
)
|
| 1430 |
target_wav = target_wav.float()
|
|
|
|
| 1431 |
torchaudio.save(
|
| 1432 |
output_path_flac,
|
| 1433 |
target_wav,
|
| 1434 |
sample_rate=sample_rate,
|
| 1435 |
format=format,
|
| 1436 |
-
compression
|
| 1437 |
)
|
| 1438 |
return output_path_flac
|
| 1439 |
|
|
|
|
| 36 |
cfg_double_condition_forward,
|
| 37 |
)
|
| 38 |
import torchaudio
|
| 39 |
+
# import torio # Deprecated, removed to fix warning
|
| 40 |
|
| 41 |
|
| 42 |
torch.backends.cudnn.benchmark = False
|
|
|
|
| 1428 |
f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
|
| 1429 |
)
|
| 1430 |
target_wav = target_wav.float()
|
| 1431 |
+
# Use simple torchaudio.save without deprecated compression parameter
|
| 1432 |
torchaudio.save(
|
| 1433 |
output_path_flac,
|
| 1434 |
target_wav,
|
| 1435 |
sample_rate=sample_rate,
|
| 1436 |
format=format,
|
| 1437 |
+
# compression parameter deprecated, format handles encoding
|
| 1438 |
)
|
| 1439 |
return output_path_flac
|
| 1440 |
|
vibevoice_integration.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Integration code for VibeVoice-PodcastCreator to use ACE-Music-Generator
|
| 3 |
+
|
| 4 |
+
Add this to your VibeVoice space to generate background music on demand.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from gradio_client import Client
|
| 8 |
+
import numpy as np
|
| 9 |
+
from scipy.io import wavfile
|
| 10 |
+
import tempfile
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MusicGenerator:
|
| 15 |
+
"""
|
| 16 |
+
Music generator client for VibeVoice podcast creation
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, space_name="ACloudCenter/ACE-Music-Generator"):
|
| 20 |
+
"""Initialize connection to music generator space"""
|
| 21 |
+
try:
|
| 22 |
+
self.client = Client(space_name)
|
| 23 |
+
self.connected = True
|
| 24 |
+
except:
|
| 25 |
+
print("Warning: Could not connect to music generator. Music features disabled.")
|
| 26 |
+
self.connected = False
|
| 27 |
+
|
| 28 |
+
def generate_intro_music(self, duration=10):
|
| 29 |
+
"""Generate intro music for podcast"""
|
| 30 |
+
if not self.connected:
|
| 31 |
+
return None
|
| 32 |
+
|
| 33 |
+
return self._generate(
|
| 34 |
+
duration=duration,
|
| 35 |
+
tags="podcast intro, upbeat, electronic, professional, 120 bpm, energetic",
|
| 36 |
+
lyrics="[instrumental]"
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
def generate_outro_music(self, duration=10):
|
| 40 |
+
"""Generate outro music for podcast"""
|
| 41 |
+
if not self.connected:
|
| 42 |
+
return None
|
| 43 |
+
|
| 44 |
+
return self._generate(
|
| 45 |
+
duration=duration,
|
| 46 |
+
tags="podcast outro, calm, ambient, soft, 80 bpm, fade out, peaceful",
|
| 47 |
+
lyrics="[instrumental]"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
def generate_background_music(self, style="ambient", duration=30):
|
| 51 |
+
"""
|
| 52 |
+
Generate background music for podcast segments
|
| 53 |
+
|
| 54 |
+
Styles:
|
| 55 |
+
- ambient: Soft background music
|
| 56 |
+
- news: Professional news-style background
|
| 57 |
+
- dramatic: Intense, dramatic music
|
| 58 |
+
- tech: Futuristic tech music
|
| 59 |
+
- chill: Relaxed lofi music
|
| 60 |
+
"""
|
| 61 |
+
styles = {
|
| 62 |
+
"ambient": "ambient, soft, background, minimal, 70 bpm, atmospheric",
|
| 63 |
+
"news": "news, professional, subtle, electronic, 90 bpm, serious",
|
| 64 |
+
"dramatic": "dramatic, orchestral, cinematic, 100 bpm, intense",
|
| 65 |
+
"tech": "electronic, futuristic, synth, 110 bpm, innovative",
|
| 66 |
+
"chill": "lofi, relaxed, warm, 75 bpm, cozy, mellow"
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
if not self.connected:
|
| 70 |
+
return None
|
| 71 |
+
|
| 72 |
+
tags = styles.get(style, styles["ambient"])
|
| 73 |
+
return self._generate(duration=duration, tags=tags, lyrics="[instrumental]")
|
| 74 |
+
|
| 75 |
+
def generate_commercial_jingle(self, duration=5):
|
| 76 |
+
"""Generate a short commercial jingle"""
|
| 77 |
+
if not self.connected:
|
| 78 |
+
return None
|
| 79 |
+
|
| 80 |
+
return self._generate(
|
| 81 |
+
duration=duration,
|
| 82 |
+
tags="jingle, commercial, catchy, upbeat, 140 bpm, happy, memorable",
|
| 83 |
+
lyrics="[instrumental]"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
def _generate(self, duration, tags, lyrics):
|
| 87 |
+
"""Internal method to generate music"""
|
| 88 |
+
try:
|
| 89 |
+
result = self.client.predict(
|
| 90 |
+
duration,
|
| 91 |
+
tags,
|
| 92 |
+
lyrics,
|
| 93 |
+
60, # infer_steps
|
| 94 |
+
15.0, # guidance_scale
|
| 95 |
+
api_name="/generate"
|
| 96 |
+
)
|
| 97 |
+
return result
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Error generating music: {e}")
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
+
def mix_with_podcast(self, podcast_audio_path, music_path, music_volume=0.2):
|
| 103 |
+
"""
|
| 104 |
+
Mix background music with podcast audio
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
podcast_audio_path: Path to podcast audio file
|
| 108 |
+
music_path: Path to music file
|
| 109 |
+
music_volume: Volume of music (0-1, lower = quieter background)
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
mixed_audio_path: Path to mixed audio file
|
| 113 |
+
"""
|
| 114 |
+
try:
|
| 115 |
+
# Load audio files
|
| 116 |
+
podcast_rate, podcast_data = wavfile.read(podcast_audio_path)
|
| 117 |
+
music_rate, music_data = wavfile.read(music_path)
|
| 118 |
+
|
| 119 |
+
# Ensure same sample rate
|
| 120 |
+
if podcast_rate != music_rate:
|
| 121 |
+
# Simple resampling (you might want to use librosa for better quality)
|
| 122 |
+
music_data = np.interp(
|
| 123 |
+
np.linspace(0, len(music_data), int(len(music_data) * podcast_rate / music_rate)),
|
| 124 |
+
np.arange(len(music_data)),
|
| 125 |
+
music_data
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Match lengths
|
| 129 |
+
if len(music_data) < len(podcast_data):
|
| 130 |
+
# Loop music if it's shorter
|
| 131 |
+
music_data = np.tile(music_data, (len(podcast_data) // len(music_data) + 1))
|
| 132 |
+
music_data = music_data[:len(podcast_data)]
|
| 133 |
+
|
| 134 |
+
# Mix audio
|
| 135 |
+
mixed = podcast_data + (music_data * music_volume)
|
| 136 |
+
|
| 137 |
+
# Normalize to prevent clipping
|
| 138 |
+
mixed = np.clip(mixed, -32768, 32767).astype(np.int16)
|
| 139 |
+
|
| 140 |
+
# Save mixed audio
|
| 141 |
+
output_path = tempfile.mktemp(suffix=".wav")
|
| 142 |
+
wavfile.write(output_path, podcast_rate, mixed)
|
| 143 |
+
|
| 144 |
+
return output_path
|
| 145 |
+
|
| 146 |
+
except Exception as e:
|
| 147 |
+
print(f"Error mixing audio: {e}")
|
| 148 |
+
return podcast_audio_path # Return original if mixing fails
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# Example usage in VibeVoice generator
|
| 152 |
+
def enhance_podcast_with_music(podcast_generator):
|
| 153 |
+
"""
|
| 154 |
+
Example of how to add this to your existing podcast generator
|
| 155 |
+
"""
|
| 156 |
+
|
| 157 |
+
# Initialize music generator
|
| 158 |
+
music_gen = MusicGenerator()
|
| 159 |
+
|
| 160 |
+
# Your existing podcast generation code
|
| 161 |
+
# podcast_audio = podcast_generator.generate_podcast(...)
|
| 162 |
+
|
| 163 |
+
# Generate intro music
|
| 164 |
+
intro_music = music_gen.generate_intro_music(duration=5)
|
| 165 |
+
|
| 166 |
+
# Generate background music for main content
|
| 167 |
+
background_music = music_gen.generate_background_music(
|
| 168 |
+
style="ambient",
|
| 169 |
+
duration=60 # Adjust based on your podcast length
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# Generate outro music
|
| 173 |
+
outro_music = music_gen.generate_outro_music(duration=5)
|
| 174 |
+
|
| 175 |
+
# Mix background music with podcast (optional)
|
| 176 |
+
# if background_music and podcast_audio:
|
| 177 |
+
# mixed_audio = music_gen.mix_with_podcast(
|
| 178 |
+
# podcast_audio,
|
| 179 |
+
# background_music,
|
| 180 |
+
# music_volume=0.1 # Keep it quiet in background
|
| 181 |
+
# )
|
| 182 |
+
|
| 183 |
+
return {
|
| 184 |
+
"intro": intro_music,
|
| 185 |
+
"background": background_music,
|
| 186 |
+
"outro": outro_music
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# Quick function to add to your VibeVoice app.py
|
| 191 |
+
def add_music_generation_to_vibevoice():
|
| 192 |
+
"""
|
| 193 |
+
Add this to your VibeVoice app.py to integrate music generation
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
# In your create_demo() function, add:
|
| 197 |
+
"""
|
| 198 |
+
# Add music generator
|
| 199 |
+
music_gen = MusicGenerator()
|
| 200 |
+
|
| 201 |
+
# Add checkbox for music generation
|
| 202 |
+
with gr.Row():
|
| 203 |
+
add_intro_music = gr.Checkbox(label="Add Intro Music", value=False)
|
| 204 |
+
add_outro_music = gr.Checkbox(label="Add Outro Music", value=False)
|
| 205 |
+
add_background_music = gr.Checkbox(label="Add Background Music", value=False)
|
| 206 |
+
background_style = gr.Dropdown(
|
| 207 |
+
choices=["ambient", "news", "dramatic", "tech", "chill"],
|
| 208 |
+
value="ambient",
|
| 209 |
+
label="Background Music Style"
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# In your generation function:
|
| 213 |
+
def generate_with_music(..., add_intro, add_outro, add_background, bg_style):
|
| 214 |
+
# Your existing generation code
|
| 215 |
+
podcast_audio = generate_podcast(...)
|
| 216 |
+
|
| 217 |
+
# Add music if requested
|
| 218 |
+
if add_intro:
|
| 219 |
+
intro = music_gen.generate_intro_music(5)
|
| 220 |
+
# Concatenate intro with podcast
|
| 221 |
+
|
| 222 |
+
if add_background:
|
| 223 |
+
bg_music = music_gen.generate_background_music(bg_style, duration=60)
|
| 224 |
+
# Mix with podcast audio
|
| 225 |
+
|
| 226 |
+
if add_outro:
|
| 227 |
+
outro = music_gen.generate_outro_music(5)
|
| 228 |
+
# Concatenate outro
|
| 229 |
+
|
| 230 |
+
return final_audio
|
| 231 |
+
"""
|
| 232 |
+
pass
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
if __name__ == "__main__":
|
| 236 |
+
# Test the music generator
|
| 237 |
+
print("Testing music generator...")
|
| 238 |
+
music_gen = MusicGenerator()
|
| 239 |
+
|
| 240 |
+
print("Generating intro music...")
|
| 241 |
+
intro = music_gen.generate_intro_music(duration=5)
|
| 242 |
+
if intro:
|
| 243 |
+
print(f"Intro music saved to: {intro}")
|
| 244 |
+
|
| 245 |
+
print("Generating background music...")
|
| 246 |
+
background = music_gen.generate_background_music(style="ambient", duration=10)
|
| 247 |
+
if background:
|
| 248 |
+
print(f"Background music saved to: {background}")
|
| 249 |
+
|
| 250 |
+
print("Done!")
|