Spaces:
Running
on
Zero
Running
on
Zero
alex
commited on
Commit
·
fd56559
1
Parent(s):
a381f1e
example added
Browse files
app.py
CHANGED
|
@@ -115,17 +115,17 @@ print("loaded model")
|
|
| 115 |
def generate_video(
|
| 116 |
text_prompt,
|
| 117 |
image,
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
shift,
|
| 124 |
-
video_guidance_scale,
|
| 125 |
-
audio_guidance_scale,
|
| 126 |
-
slg_layer,
|
| 127 |
-
video_negative_prompt,
|
| 128 |
-
audio_negative_prompt,
|
| 129 |
):
|
| 130 |
try:
|
| 131 |
image_path = None
|
|
@@ -179,28 +179,6 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
|
|
| 179 |
|
| 180 |
# Build UI
|
| 181 |
with gr.Blocks() as demo:
|
| 182 |
-
gr.Markdown("# 🎥 Ovi Joint Video + Audio Generation Demo")
|
| 183 |
-
gr.Markdown(
|
| 184 |
-
"""
|
| 185 |
-
## 📘 Instructions
|
| 186 |
-
|
| 187 |
-
Follow the steps in order:
|
| 188 |
-
|
| 189 |
-
1️⃣ **Enter a Text Prompt** — describe your video. (This text prompt will be shared for image generation if enabled.)
|
| 190 |
-
2️⃣ **Upload or Generate an Image** — Upload an image or generate one if image generation is enabled. (If you do not see the image generation options, make sure to run the script with `--use_image_gen`.)
|
| 191 |
-
3️⃣ **Configure Video Options** — set resolution, seed, solver, and other parameters. (It will automatically use the uploaded/generated image as the first frame, whichever is rendered on your screen at the time of video generation.)
|
| 192 |
-
4️⃣ **Generate Video** — click the button to produce your final video with audio.
|
| 193 |
-
5️⃣ **View the Result** — your generated video will appear below.
|
| 194 |
-
|
| 195 |
-
---
|
| 196 |
-
|
| 197 |
-
### 💡 Tips
|
| 198 |
-
1. For best results, use detailed and specific text prompts.
|
| 199 |
-
2. Ensure text prompt format is correct, i.e speech to be said should be wrapped with `<S>...<E>`. Can provide optional audio description at the end, wrapping them in `<AUDCAP> ... <ENDAUDCAP>`, refer to examples
|
| 200 |
-
3. Do not be discouraged by bad or weird results, check prompt format and try different seeds, cfg values and slg layers.
|
| 201 |
-
"""
|
| 202 |
-
)
|
| 203 |
-
|
| 204 |
|
| 205 |
with gr.Row():
|
| 206 |
with gr.Column():
|
|
@@ -245,6 +223,22 @@ with gr.Blocks() as demo:
|
|
| 245 |
with gr.Column():
|
| 246 |
output_path = gr.Video(label="Generated Video")
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
if args.use_image_gen and gen_img_btn is not None:
|
| 249 |
gen_img_btn.click(
|
| 250 |
fn=generate_image,
|
|
@@ -252,14 +246,9 @@ with gr.Blocks() as demo:
|
|
| 252 |
outputs=[image],
|
| 253 |
)
|
| 254 |
|
| 255 |
-
# Hook up video generation
|
| 256 |
run_btn.click(
|
| 257 |
fn=generate_video,
|
| 258 |
-
inputs=[
|
| 259 |
-
video_text_prompt, image, video_height, video_width, video_seed, solver_name,
|
| 260 |
-
sample_steps, shift, video_guidance_scale, audio_guidance_scale,
|
| 261 |
-
slg_layer, video_negative_prompt, audio_negative_prompt,
|
| 262 |
-
],
|
| 263 |
outputs=[output_path],
|
| 264 |
)
|
| 265 |
|
|
|
|
| 115 |
def generate_video(
|
| 116 |
text_prompt,
|
| 117 |
image,
|
| 118 |
+
sample_steps = 50,
|
| 119 |
+
video_frame_height = 992,
|
| 120 |
+
video_frame_width = 512,
|
| 121 |
+
video_seed = 100,
|
| 122 |
+
solver_name = "unipc",
|
| 123 |
+
shift = 5,
|
| 124 |
+
video_guidance_scale = 4,
|
| 125 |
+
audio_guidance_scale = 3,
|
| 126 |
+
slg_layer = 11,
|
| 127 |
+
video_negative_prompt = "",
|
| 128 |
+
audio_negative_prompt = "",
|
| 129 |
):
|
| 130 |
try:
|
| 131 |
image_path = None
|
|
|
|
| 179 |
|
| 180 |
# Build UI
|
| 181 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
with gr.Row():
|
| 184 |
with gr.Column():
|
|
|
|
| 223 |
with gr.Column():
|
| 224 |
output_path = gr.Video(label="Generated Video")
|
| 225 |
|
| 226 |
+
gr.Examples(
|
| 227 |
+
examples=[
|
| 228 |
+
|
| 229 |
+
[
|
| 230 |
+
"A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
|
| 231 |
+
"example_prompts/pngs/67.png",
|
| 232 |
+
50,
|
| 233 |
+
],
|
| 234 |
+
|
| 235 |
+
],
|
| 236 |
+
inputs=[video_text_prompt, image, sample_steps],
|
| 237 |
+
outputs=[output_path],
|
| 238 |
+
fn=generate_video,
|
| 239 |
+
cache_examples=True,
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
if args.use_image_gen and gen_img_btn is not None:
|
| 243 |
gen_img_btn.click(
|
| 244 |
fn=generate_image,
|
|
|
|
| 246 |
outputs=[image],
|
| 247 |
)
|
| 248 |
|
|
|
|
| 249 |
run_btn.click(
|
| 250 |
fn=generate_video,
|
| 251 |
+
inputs=[video_text_prompt, image, sample_steps],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
outputs=[output_path],
|
| 253 |
)
|
| 254 |
|