Spaces:

ACloudCenter
/

ACE-Music-Generator

Running on Zero

ACloudCenter commited on Sep 1

Commit

009c9f3

1 Parent(s): 50a3943

Add API endpoint and fix deprecation warnings

- Fixed torchaudio deprecation warning by removing torio import
- Added API endpoint for external calls at /generate
- Created example code for calling from other spaces
- Added VibeVoice integration helper class
- Made music generation accessible via Gradio Client API

Files changed (4) hide show

api_usage_example.py +170 -0
app.py +72 -0
pipeline_ace_step.py +3 -2
vibevoice_integration.py +250 -0

api_usage_example.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""
+Example code for calling ACE-Music-Generator from another Hugging Face Space
+This shows how to use the ACE-Music-Generator API from your podcast space
+or any other Python application.
+"""
+from gradio_client import Client
+import tempfile
+import requests
+# Method 1: Using Gradio Client (Recommended for Spaces)
+def generate_music_from_space(
+    duration=20,
+    tags="edm, synth, bass, 128 bpm, energetic",
+    lyrics="[instrumental]",
+    space_name="ACloudCenter/ACE-Music-Generator"
+):
+    """
+    Generate music using the ACE-Music-Generator space API
+    Args:
+        duration: Duration in seconds
+        tags: Music style tags
+        lyrics: Lyrics or [instrumental]
+        space_name: Your Hugging Face space name
+    Returns:
+        audio_file_path: Path to downloaded audio file
+    """
+    try:
+        # Connect to your space
+        client = Client(space_name)
+        # Call the generate function
+        result = client.predict(
+            duration,
+            tags,
+            lyrics,
+            60,  # infer_steps
+            15.0,  # guidance_scale
+            api_name="/generate"
+        )
+        # Result is the path to the audio file
+        return result
+    except Exception as e:
+        print(f"Error generating music: {e}")
+        return None
+# Method 2: Direct HTTP API call
+def generate_music_http(
+    duration=20,
+    tags="edm, synth, bass, 128 bpm, energetic",
+    lyrics="[instrumental]",
+    space_url="https://acloudcenter-ace-music-generator.hf.space"
+):
+    """
+    Generate music using direct HTTP API call
+    Args:
+        duration: Duration in seconds
+        tags: Music style tags
+        lyrics: Lyrics or [instrumental]
+        space_url: Your space URL
+    Returns:
+        audio_file_path: Path to downloaded audio file
+    """
+    import json
+    api_url = f"{space_url}/run/generate"
+    payload = {
+        "data": [
+            duration,
+            tags,
+            lyrics,
+            60,  # infer_steps
+            15.0,  # guidance_scale
+        ]
+    }
+    try:
+        # Make the API call
+        response = requests.post(api_url, json=payload)
+        if response.status_code == 200:
+            result = response.json()
+            # Download the audio file
+            audio_url = result["data"][0]["url"]
+            # Save to temp file
+            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+                audio_response = requests.get(audio_url)
+                f.write(audio_response.content)
+                return f.name
+        else:
+            print(f"Error: {response.status_code}")
+            return None
+    except Exception as e:
+        print(f"Error generating music: {e}")
+        return None
+# Example usage in your podcast generator
+def add_background_music_to_podcast():
+    """
+    Example of how to use in your podcast space
+    """
+    # Generate a 20-second EDM track
+    music_path = generate_music_from_space(
+        duration=20,
+        tags="edm, ambient, soft, background, 100 bpm, calm",
+        lyrics="[instrumental]"
+    )
+    if music_path:
+        print(f"Generated music saved to: {music_path}")
+        # Now you can use this in your podcast generation
+        # For example, mix it with your podcast audio
+        return music_path
+    else:
+        print("Failed to generate music")
+        return None
+# Different music styles you can generate
+MUSIC_STYLES = {
+    "podcast_intro": "upbeat, electronic, professional, 120 bpm, energetic, modern",
+    "podcast_outro": "calm, ambient, soft, 80 bpm, relaxing, fade out",
+    "news_background": "minimal, electronic, subtle, 90 bpm, serious, professional",
+    "commercial": "pop, upbeat, catchy, 128 bpm, happy, commercial",
+    "dramatic": "orchestral, dramatic, cinematic, 100 bpm, intense, emotional",
+    "tech": "electronic, futuristic, synth, 110 bpm, innovative, modern",
+    "chill": "lofi, relaxed, warm, 75 bpm, cozy, background",
+}
+def generate_podcast_music(style="podcast_intro", duration=15):
+    """
+    Generate music for different podcast segments
+    Args:
+        style: One of the predefined styles
+        duration: Duration in seconds
+    Returns:
+        audio_file_path: Path to generated audio
+    """
+    tags = MUSIC_STYLES.get(style, MUSIC_STYLES["podcast_intro"])
+    return generate_music_from_space(
+        duration=duration,
+        tags=tags,
+        lyrics="[instrumental]"
+    )
+if __name__ == "__main__":
+    # Test the API
+    print("Generating test music...")
+    audio_file = generate_podcast_music(style="podcast_intro", duration=10)
+    if audio_file:
+        print(f"Success! Audio saved to: {audio_file}")
+    else:
+        print("Failed to generate audio")

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from ui.components import create_main_demo_ui
 from pipeline_ace_step import ACEStepPipeline
 from data_sampler import DataSampler
 import os
 parser = argparse.ArgumentParser()
@@ -30,12 +31,83 @@ def main(args):
         torch_compile=args.torch_compile
     )
     data_sampler = DataSampler()
     demo = create_main_demo_ui(
         text2music_process_func=model_demo.__call__,
         sample_data_func=data_sampler.sample,
         load_data_func=data_sampler.load_json,
     )
     demo.queue(default_concurrency_limit=8).launch(
         server_name=args.server_name,
         server_port=args.port,

 from pipeline_ace_step import ACEStepPipeline
 from data_sampler import DataSampler
 import os
+import gradio as gr
 parser = argparse.ArgumentParser()
         torch_compile=args.torch_compile
     )
     data_sampler = DataSampler()
+    # Create API function for external calls
+    def generate_music_api(
+        duration: float = 20.0,
+        tags: str = "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
+        lyrics: str = "[instrumental]",
+        infer_steps: int = 60,
+        guidance_scale: float = 15.0,
+    ):
+        """
+        API function to generate music
+        Args:
+            duration: Duration in seconds (default 20)
+            tags: Music tags/style description
+            lyrics: Lyrics or [instrumental] for no vocals
+            infer_steps: Inference steps (default 60)
+            guidance_scale: Guidance scale (default 15.0)
+        Returns:
+            audio_path: Path to generated audio file
+        """
+        result = model_demo(
+            audio_duration=duration,
+            prompt=tags,
+            lyrics=lyrics,
+            infer_step=infer_steps,
+            guidance_scale=guidance_scale,
+            scheduler_type="euler",
+            cfg_type="apg",
+            omega_scale=10.0,
+            manual_seeds=None,
+            guidance_interval=0.5,
+            guidance_interval_decay=0.0,
+            min_guidance_scale=3.0,
+            use_erg_tag=True,
+            use_erg_lyric=False,
+            use_erg_diffusion=True,
+            oss_steps=None,
+            guidance_scale_text=0.0,
+            guidance_scale_lyric=0.0,
+            audio2audio_enable=False,
+            ref_audio_strength=0.5,
+            ref_audio_input=None,
+            lora_name_or_path="none"
+        )
+        # Return the audio file path
+        if result and len(result) > 0:
+            return result[0]  # Return first audio output
+        return None
     demo = create_main_demo_ui(
         text2music_process_func=model_demo.__call__,
         sample_data_func=data_sampler.sample,
         load_data_func=data_sampler.load_json,
     )
+    # Add API endpoint to the demo
+    demo.api_open = True
+    demo.api_name = "/generate_music"
+    # Make the API function available
+    with demo:
+        gr.Interface(
+            fn=generate_music_api,
+            inputs=[
+                gr.Number(value=20, label="Duration (seconds)"),
+                gr.Textbox(value="edm, synth, bass, 128 bpm, energetic", label="Tags"),
+                gr.Textbox(value="[instrumental]", label="Lyrics"),
+                gr.Number(value=60, label="Inference Steps"),
+                gr.Number(value=15.0, label="Guidance Scale"),
+            ],
+            outputs=gr.Audio(type="filepath", label="Generated Music"),
+            api_name="generate",
+            visible=False  # Hide this interface, it's only for API
+        )
     demo.queue(default_concurrency_limit=8).launch(
         server_name=args.server_name,
         server_port=args.port,

pipeline_ace_step.py CHANGED Viewed

@@ -36,7 +36,7 @@ from apg_guidance import (
     cfg_double_condition_forward,
 )
 import torchaudio
-import torio
 torch.backends.cudnn.benchmark = False
@@ -1428,12 +1428,13 @@ class ACEStepPipeline:
             f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
         )
         target_wav = target_wav.float()
         torchaudio.save(
             output_path_flac,
             target_wav,
             sample_rate=sample_rate,
             format=format,
-            compression=torio.io.CodecConfig(bit_rate=320000),
         )
         return output_path_flac

     cfg_double_condition_forward,
 )
 import torchaudio
+# import torio  # Deprecated, removed to fix warning
 torch.backends.cudnn.benchmark = False
             f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
         )
         target_wav = target_wav.float()
+        # Use simple torchaudio.save without deprecated compression parameter
         torchaudio.save(
             output_path_flac,
             target_wav,
             sample_rate=sample_rate,
             format=format,
+            # compression parameter deprecated, format handles encoding
         )
         return output_path_flac

vibevoice_integration.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+Integration code for VibeVoice-PodcastCreator to use ACE-Music-Generator
+Add this to your VibeVoice space to generate background music on demand.
+"""
+from gradio_client import Client
+import numpy as np
+from scipy.io import wavfile
+import tempfile
+import os
+class MusicGenerator:
+    """
+    Music generator client for VibeVoice podcast creation
+    """
+    def __init__(self, space_name="ACloudCenter/ACE-Music-Generator"):
+        """Initialize connection to music generator space"""
+        try:
+            self.client = Client(space_name)
+            self.connected = True
+        except:
+            print("Warning: Could not connect to music generator. Music features disabled.")
+            self.connected = False
+    def generate_intro_music(self, duration=10):
+        """Generate intro music for podcast"""
+        if not self.connected:
+            return None
+        return self._generate(
+            duration=duration,
+            tags="podcast intro, upbeat, electronic, professional, 120 bpm, energetic",
+            lyrics="[instrumental]"
+        )
+    def generate_outro_music(self, duration=10):
+        """Generate outro music for podcast"""
+        if not self.connected:
+            return None
+        return self._generate(
+            duration=duration,
+            tags="podcast outro, calm, ambient, soft, 80 bpm, fade out, peaceful",
+            lyrics="[instrumental]"
+        )
+    def generate_background_music(self, style="ambient", duration=30):
+        """
+        Generate background music for podcast segments
+        Styles:
+        - ambient: Soft background music
+        - news: Professional news-style background
+        - dramatic: Intense, dramatic music
+        - tech: Futuristic tech music
+        - chill: Relaxed lofi music
+        """
+        styles = {
+            "ambient": "ambient, soft, background, minimal, 70 bpm, atmospheric",
+            "news": "news, professional, subtle, electronic, 90 bpm, serious",
+            "dramatic": "dramatic, orchestral, cinematic, 100 bpm, intense",
+            "tech": "electronic, futuristic, synth, 110 bpm, innovative",
+            "chill": "lofi, relaxed, warm, 75 bpm, cozy, mellow"
+        }
+        if not self.connected:
+            return None
+        tags = styles.get(style, styles["ambient"])
+        return self._generate(duration=duration, tags=tags, lyrics="[instrumental]")
+    def generate_commercial_jingle(self, duration=5):
+        """Generate a short commercial jingle"""
+        if not self.connected:
+            return None
+        return self._generate(
+            duration=duration,
+            tags="jingle, commercial, catchy, upbeat, 140 bpm, happy, memorable",
+            lyrics="[instrumental]"
+        )
+    def _generate(self, duration, tags, lyrics):
+        """Internal method to generate music"""
+        try:
+            result = self.client.predict(
+                duration,
+                tags,
+                lyrics,
+                60,  # infer_steps
+                15.0,  # guidance_scale
+                api_name="/generate"
+            )
+            return result
+        except Exception as e:
+            print(f"Error generating music: {e}")
+            return None
+    def mix_with_podcast(self, podcast_audio_path, music_path, music_volume=0.2):
+        """
+        Mix background music with podcast audio
+        Args:
+            podcast_audio_path: Path to podcast audio file
+            music_path: Path to music file
+            music_volume: Volume of music (0-1, lower = quieter background)
+        Returns:
+            mixed_audio_path: Path to mixed audio file
+        """
+        try:
+            # Load audio files
+            podcast_rate, podcast_data = wavfile.read(podcast_audio_path)
+            music_rate, music_data = wavfile.read(music_path)
+            # Ensure same sample rate
+            if podcast_rate != music_rate:
+                # Simple resampling (you might want to use librosa for better quality)
+                music_data = np.interp(
+                    np.linspace(0, len(music_data), int(len(music_data) * podcast_rate / music_rate)),
+                    np.arange(len(music_data)),
+                    music_data
+                )
+            # Match lengths
+            if len(music_data) < len(podcast_data):
+                # Loop music if it's shorter
+                music_data = np.tile(music_data, (len(podcast_data) // len(music_data) + 1))
+            music_data = music_data[:len(podcast_data)]
+            # Mix audio
+            mixed = podcast_data + (music_data * music_volume)
+            # Normalize to prevent clipping
+            mixed = np.clip(mixed, -32768, 32767).astype(np.int16)
+            # Save mixed audio
+            output_path = tempfile.mktemp(suffix=".wav")
+            wavfile.write(output_path, podcast_rate, mixed)
+            return output_path
+        except Exception as e:
+            print(f"Error mixing audio: {e}")
+            return podcast_audio_path  # Return original if mixing fails
+# Example usage in VibeVoice generator
+def enhance_podcast_with_music(podcast_generator):
+    """
+    Example of how to add this to your existing podcast generator
+    """
+    # Initialize music generator
+    music_gen = MusicGenerator()
+    # Your existing podcast generation code
+    # podcast_audio = podcast_generator.generate_podcast(...)
+    # Generate intro music
+    intro_music = music_gen.generate_intro_music(duration=5)
+    # Generate background music for main content
+    background_music = music_gen.generate_background_music(
+        style="ambient",
+        duration=60  # Adjust based on your podcast length
+    )
+    # Generate outro music
+    outro_music = music_gen.generate_outro_music(duration=5)
+    # Mix background music with podcast (optional)
+    # if background_music and podcast_audio:
+    #     mixed_audio = music_gen.mix_with_podcast(
+    #         podcast_audio,
+    #         background_music,
+    #         music_volume=0.1  # Keep it quiet in background
+    #     )
+    return {
+        "intro": intro_music,
+        "background": background_music,
+        "outro": outro_music
+    }
+# Quick function to add to your VibeVoice app.py
+def add_music_generation_to_vibevoice():
+    """
+    Add this to your VibeVoice app.py to integrate music generation
+    """
+    # In your create_demo() function, add:
+    """
+    # Add music generator
+    music_gen = MusicGenerator()
+    # Add checkbox for music generation
+    with gr.Row():
+        add_intro_music = gr.Checkbox(label="Add Intro Music", value=False)
+        add_outro_music = gr.Checkbox(label="Add Outro Music", value=False)
+        add_background_music = gr.Checkbox(label="Add Background Music", value=False)
+        background_style = gr.Dropdown(
+            choices=["ambient", "news", "dramatic", "tech", "chill"],
+            value="ambient",
+            label="Background Music Style"
+        )
+    # In your generation function:
+    def generate_with_music(..., add_intro, add_outro, add_background, bg_style):
+        # Your existing generation code
+        podcast_audio = generate_podcast(...)
+        # Add music if requested
+        if add_intro:
+            intro = music_gen.generate_intro_music(5)
+            # Concatenate intro with podcast
+        if add_background:
+            bg_music = music_gen.generate_background_music(bg_style, duration=60)
+            # Mix with podcast audio
+        if add_outro:
+            outro = music_gen.generate_outro_music(5)
+            # Concatenate outro
+        return final_audio
+    """
+    pass
+if __name__ == "__main__":
+    # Test the music generator
+    print("Testing music generator...")
+    music_gen = MusicGenerator()
+    print("Generating intro music...")
+    intro = music_gen.generate_intro_music(duration=5)
+    if intro:
+        print(f"Intro music saved to: {intro}")
+    print("Generating background music...")
+    background = music_gen.generate_background_music(style="ambient", duration=10)
+    if background:
+        print(f"Background music saved to: {background}")
+    print("Done!")