Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,8 +10,16 @@ from audioldm.variational_autoencoder import AutoencoderKL
|
|
| 10 |
from gradio import Markdown
|
| 11 |
import spaces
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
class Tango:
|
| 14 |
-
def __init__(self, name="declare-lab/tango
|
| 15 |
|
| 16 |
path = snapshot_download(repo_id=name)
|
| 17 |
|
|
@@ -70,9 +78,9 @@ class Tango:
|
|
| 70 |
# Initialize TANGO
|
| 71 |
|
| 72 |
tango = Tango(device="cpu")
|
| 73 |
-
tango.vae.to(
|
| 74 |
-
tango.stft.to(
|
| 75 |
-
tango.model.to(
|
| 76 |
|
| 77 |
@spaces.GPU(duration=60)
|
| 78 |
def gradio_generate(prompt, steps, guidance):
|
|
@@ -102,7 +110,12 @@ def gradio_generate(prompt, steps, guidance):
|
|
| 102 |
# Using this ChatGPT-generated description of the sound, TANGO provides superior results.
|
| 103 |
# <p/>
|
| 104 |
# """
|
| 105 |
-
description_text = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
# Gradio input and output components
|
| 107 |
input_text = gr.Textbox(lines=2, label="Prompt")
|
| 108 |
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
|
@@ -114,7 +127,7 @@ gr_interface = gr.Interface(
|
|
| 114 |
fn=gradio_generate,
|
| 115 |
inputs=[input_text, denoising_steps, guidance_scale],
|
| 116 |
outputs=[output_audio],
|
| 117 |
-
title="
|
| 118 |
description=description_text,
|
| 119 |
allow_flagging=False,
|
| 120 |
examples=[
|
|
|
|
| 10 |
from gradio import Markdown
|
| 11 |
import spaces
|
| 12 |
|
| 13 |
+
# Automatic device detection
|
| 14 |
+
if torch.cuda.is_available():
|
| 15 |
+
device_type = "cuda"
|
| 16 |
+
device_selection = "cuda:0"
|
| 17 |
+
else:
|
| 18 |
+
device_type = "cpu"
|
| 19 |
+
device_selection = "cpu"
|
| 20 |
+
|
| 21 |
class Tango:
|
| 22 |
+
def __init__(self, name="declare-lab/tango", device=device_selection):
|
| 23 |
|
| 24 |
path = snapshot_download(repo_id=name)
|
| 25 |
|
|
|
|
| 78 |
# Initialize TANGO
|
| 79 |
|
| 80 |
tango = Tango(device="cpu")
|
| 81 |
+
tango.vae.to(device_type)
|
| 82 |
+
tango.stft.to(device_type)
|
| 83 |
+
tango.model.to(device_type)
|
| 84 |
|
| 85 |
@spaces.GPU(duration=60)
|
| 86 |
def gradio_generate(prompt, steps, guidance):
|
|
|
|
| 110 |
# Using this ChatGPT-generated description of the sound, TANGO provides superior results.
|
| 111 |
# <p/>
|
| 112 |
# """
|
| 113 |
+
description_text = """
|
| 114 |
+
<p><a href="https://huggingface.co/spaces/declare-lab/tango2/blob/main/app.py?duplicate=true"> <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> For faster inference without waiting in queue, you may duplicate the space and upgrade to a GPU in the settings. <br/><br/>
|
| 115 |
+
Generate audio using Tango2 by providing a text prompt. Tango2 was built from Tango and was trained on <a href="https://huggingface.co/datasets/declare-lab/audio-alpaca">Audio-alpaca</a>
|
| 116 |
+
<br/><br/> This is the demo for Tango2 for text to audio generation: <a href="https://arxiv.org/abs/2404.09956">Read our paper.</a>
|
| 117 |
+
<p/>
|
| 118 |
+
"""
|
| 119 |
# Gradio input and output components
|
| 120 |
input_text = gr.Textbox(lines=2, label="Prompt")
|
| 121 |
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
|
|
|
| 127 |
fn=gradio_generate,
|
| 128 |
inputs=[input_text, denoising_steps, guidance_scale],
|
| 129 |
outputs=[output_audio],
|
| 130 |
+
title="Tango 2: Aligning Diffusion-based Text-to-Audio Generations through Direct Preference Optimization",
|
| 131 |
description=description_text,
|
| 132 |
allow_flagging=False,
|
| 133 |
examples=[
|