Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,37 +5,35 @@ import gradio as gr
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
import torch
|
| 7 |
from scipy.io.wavfile import write
|
| 8 |
-
from diffusers import DiffusionPipeline
|
| 9 |
from transformers import pipeline
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
load_dotenv()
|
| 13 |
hf_token = os.getenv("HF_TKN")
|
| 14 |
|
| 15 |
-
|
|
|
|
| 16 |
captioning_pipeline = pipeline(
|
| 17 |
"image-to-text",
|
| 18 |
-
model="nlpconnect/vit-gpt2-image-captioning"
|
|
|
|
| 19 |
)
|
| 20 |
|
| 21 |
pipe = DiffusionPipeline.from_pretrained(
|
| 22 |
"cvssp/audioldm2",
|
| 23 |
use_auth_token=hf_token
|
| 24 |
)
|
|
|
|
| 25 |
|
| 26 |
@spaces.GPU(duration=120)
|
| 27 |
def analyze_image_with_free_model(image_file):
|
| 28 |
try:
|
| 29 |
-
# Move captioning pipeline to GPU
|
| 30 |
-
captioning_pipeline.to("cuda")
|
| 31 |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
|
| 32 |
temp_file.write(image_file)
|
| 33 |
temp_image_path = temp_file.name
|
| 34 |
|
| 35 |
results = captioning_pipeline(temp_image_path)
|
| 36 |
-
# Move back to CPU (optional)
|
| 37 |
-
captioning_pipeline.to("cpu")
|
| 38 |
-
|
| 39 |
if not results or not isinstance(results, list):
|
| 40 |
return "Error: Could not generate caption.", True
|
| 41 |
|
|
@@ -50,7 +48,6 @@ def analyze_image_with_free_model(image_file):
|
|
| 50 |
@spaces.GPU(duration=120)
|
| 51 |
def get_audioldm_from_caption(caption):
|
| 52 |
try:
|
| 53 |
-
# Move AudioLDM pipeline to GPU
|
| 54 |
pipe.to("cuda")
|
| 55 |
audio_output = pipe(
|
| 56 |
prompt=caption,
|
|
@@ -61,7 +58,7 @@ def get_audioldm_from_caption(caption):
|
|
| 61 |
audio = audio_output.audios[0]
|
| 62 |
|
| 63 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
|
| 64 |
-
write(temp_wav.name, 16000, audio)
|
| 65 |
return temp_wav.name
|
| 66 |
|
| 67 |
except Exception as e:
|
|
@@ -78,12 +75,10 @@ css = """
|
|
| 78 |
with gr.Blocks(css=css) as demo:
|
| 79 |
with gr.Column(elem_id="col-container"):
|
| 80 |
gr.HTML("""
|
| 81 |
-
<h1 style="text-align: center;">
|
| 82 |
-
|
| 83 |
-
</
|
| 84 |
-
|
| 85 |
-
⚡ Powered by <a href="https://bilsimaging.com" target="_blank">Bilsimaging</a>
|
| 86 |
-
</p>
|
| 87 |
""")
|
| 88 |
|
| 89 |
gr.Markdown("""
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
import torch
|
| 7 |
from scipy.io.wavfile import write
|
| 8 |
+
from diffusers import DiffusionPipeline
|
| 9 |
from transformers import pipeline
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
load_dotenv()
|
| 13 |
hf_token = os.getenv("HF_TKN")
|
| 14 |
|
| 15 |
+
device_id = 0 if torch.cuda.is_available() else -1
|
| 16 |
+
|
| 17 |
captioning_pipeline = pipeline(
|
| 18 |
"image-to-text",
|
| 19 |
+
model="nlpconnect/vit-gpt2-image-captioning",
|
| 20 |
+
device=device_id
|
| 21 |
)
|
| 22 |
|
| 23 |
pipe = DiffusionPipeline.from_pretrained(
|
| 24 |
"cvssp/audioldm2",
|
| 25 |
use_auth_token=hf_token
|
| 26 |
)
|
| 27 |
+
# The AudioLDM pipeline can be moved to CUDA/CPU explicitly inside the function.
|
| 28 |
|
| 29 |
@spaces.GPU(duration=120)
|
| 30 |
def analyze_image_with_free_model(image_file):
|
| 31 |
try:
|
|
|
|
|
|
|
| 32 |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
|
| 33 |
temp_file.write(image_file)
|
| 34 |
temp_image_path = temp_file.name
|
| 35 |
|
| 36 |
results = captioning_pipeline(temp_image_path)
|
|
|
|
|
|
|
|
|
|
| 37 |
if not results or not isinstance(results, list):
|
| 38 |
return "Error: Could not generate caption.", True
|
| 39 |
|
|
|
|
| 48 |
@spaces.GPU(duration=120)
|
| 49 |
def get_audioldm_from_caption(caption):
|
| 50 |
try:
|
|
|
|
| 51 |
pipe.to("cuda")
|
| 52 |
audio_output = pipe(
|
| 53 |
prompt=caption,
|
|
|
|
| 58 |
audio = audio_output.audios[0]
|
| 59 |
|
| 60 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
|
| 61 |
+
write(temp_wav.name, 16000, audio)
|
| 62 |
return temp_wav.name
|
| 63 |
|
| 64 |
except Exception as e:
|
|
|
|
| 75 |
with gr.Blocks(css=css) as demo:
|
| 76 |
with gr.Column(elem_id="col-container"):
|
| 77 |
gr.HTML("""
|
| 78 |
+
<h1 style="text-align: center;">🎶 Generate Sound Effects from Image</h1>
|
| 79 |
+
<p style="text-align: center;">
|
| 80 |
+
⚡ Powered by <a href="https://bilsimaging.com" target="_blank">Bilsimaging</a>
|
| 81 |
+
</p>
|
|
|
|
|
|
|
| 82 |
""")
|
| 83 |
|
| 84 |
gr.Markdown("""
|