Spaces:
Runtime error
Runtime error
MCP ready
Browse files
app.py
CHANGED
|
@@ -102,6 +102,31 @@ def merge_audio_to_video(input_vid, input_aud):
|
|
| 102 |
|
| 103 |
@spaces.GPU(duration=100)
|
| 104 |
def infer(video_in):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
# check if 'outputs' dir exists and empty it if necessary
|
| 107 |
check_outputs_folder('./outputs/tmp')
|
|
@@ -223,6 +248,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 223 |
fn = infer,
|
| 224 |
inputs = [video_in],
|
| 225 |
outputs = [output_sound, output_spectrogram, merged_out],
|
| 226 |
-
show_api =
|
| 227 |
)
|
| 228 |
-
demo.launch(show_api=
|
|
|
|
| 102 |
|
| 103 |
@spaces.GPU(duration=100)
|
| 104 |
def infer(video_in):
|
| 105 |
+
"""Generate an audio track from a silent video using a pre-trained VTA (Video-to-Audio) model.
|
| 106 |
+
|
| 107 |
+
This function performs the following steps:
|
| 108 |
+
1. Ensures the output directory is clean.
|
| 109 |
+
2. Optionally trims the video to a maximum of 10 seconds.
|
| 110 |
+
3. Runs inference using a pre-trained latent diffusion model to generate audio.
|
| 111 |
+
4. Finds the generated WAV audio output.
|
| 112 |
+
5. Plots a spectrogram of the generated audio.
|
| 113 |
+
6. Merges the audio back into the input video.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
video_in (str): The file path to the input silent video (MP4 format). If the video is longer than 10 seconds, it will be trimmed.
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
Tuple[str, str, str]:
|
| 120 |
+
- The path to the generated `.wav` audio file.
|
| 121 |
+
- The path to the generated spectrogram `.png` image.
|
| 122 |
+
- The path to the final `.mp4` video with the generated audio merged in.
|
| 123 |
+
|
| 124 |
+
Example:
|
| 125 |
+
Given a silent video of a lion, this function will return:
|
| 126 |
+
- A realistic generated audio track simulating the lion's sound,
|
| 127 |
+
- A visual spectrogram representation of the audio,
|
| 128 |
+
- And a new video file where the generated audio is synced to the original visuals.
|
| 129 |
+
"""
|
| 130 |
|
| 131 |
# check if 'outputs' dir exists and empty it if necessary
|
| 132 |
check_outputs_folder('./outputs/tmp')
|
|
|
|
| 248 |
fn = infer,
|
| 249 |
inputs = [video_in],
|
| 250 |
outputs = [output_sound, output_spectrogram, merged_out],
|
| 251 |
+
show_api = True
|
| 252 |
)
|
| 253 |
+
demo.launch(show_api=True, show_error=True, ssr_mode=False, mcp_server=True)
|