OVI

Running on Zero

App Files Files Community

alex commited on Oct 3

Commit

fd56559

1 Parent(s): a381f1e

example added

Browse files

Files changed (1) hide show

app.py +28 -39

app.py CHANGED Viewed

@@ -115,17 +115,17 @@ print("loaded model")
 def generate_video(
     text_prompt,
     image,
-    video_frame_height,
-    video_frame_width,
-    video_seed,
-    solver_name,
-    sample_steps,
-    shift,
-    video_guidance_scale,
-    audio_guidance_scale,
-    slg_layer,
-    video_negative_prompt,
-    audio_negative_prompt,
 ):
     try:
         image_path = None
@@ -179,28 +179,6 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
 # Build UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎥 Ovi Joint Video + Audio Generation Demo")
-    gr.Markdown(
-        """
-        ## 📘 Instructions
-        Follow the steps in order:
-        1️⃣ **Enter a Text Prompt** — describe your video. (This text prompt will be shared for image generation if enabled.)
-        2️⃣ **Upload or Generate an Image** — Upload an image or generate one if image generation is enabled.  (If you do not see the image generation options, make sure to run the script with `--use_image_gen`.)
-        3️⃣ **Configure Video Options** — set resolution, seed, solver, and other parameters. (It will automatically use the uploaded/generated image as the first frame, whichever is rendered on your screen at the time of video generation.)
-        4️⃣ **Generate Video** — click the button to produce your final video with audio.
-        5️⃣ **View the Result** — your generated video will appear below.
-        ---
-        ### 💡 Tips
-        1. For best results, use detailed and specific text prompts.
-        2. Ensure text prompt format is correct, i.e speech to be said should be wrapped with `<S>...<E>`. Can provide optional audio description at the end, wrapping them in `<AUDCAP> ... <ENDAUDCAP>`, refer to examples
-        3. Do not be discouraged by bad or weird results, check prompt format and try different seeds, cfg values and slg layers.
-        """
-    )
     with gr.Row():
         with gr.Column():
@@ -245,6 +223,22 @@ with gr.Blocks() as demo:
         with gr.Column():
             output_path = gr.Video(label="Generated Video")
     if args.use_image_gen and gen_img_btn is not None:
         gen_img_btn.click(
             fn=generate_image,
@@ -252,14 +246,9 @@ with gr.Blocks() as demo:
             outputs=[image],
         )
-    # Hook up video generation
     run_btn.click(
         fn=generate_video,
-        inputs=[
-            video_text_prompt, image, video_height, video_width, video_seed, solver_name,
-            sample_steps, shift, video_guidance_scale, audio_guidance_scale,
-            slg_layer, video_negative_prompt, audio_negative_prompt,
-        ],
         outputs=[output_path],
     )

 def generate_video(
     text_prompt,
     image,
+    sample_steps = 50,
+    video_frame_height = 992,
+    video_frame_width = 512,
+    video_seed = 100,
+    solver_name = "unipc",
+    shift = 5,
+    video_guidance_scale = 4,
+    audio_guidance_scale = 3,
+    slg_layer = 11,
+    video_negative_prompt = "",
+    audio_negative_prompt = "",
 ):
     try:
         image_path = None
 # Build UI
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
         with gr.Column():
             output_path = gr.Video(label="Generated Video")
+            gr.Examples(
+                examples=[
+                    [
+                        "A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
+                        "example_prompts/pngs/67.png",
+                        50,
+                    ],
+                ],
+                inputs=[video_text_prompt, image, sample_steps],
+                outputs=[output_path],
+                fn=generate_video,
+                cache_examples=True,
+            )
     if args.use_image_gen and gen_img_btn is not None:
         gen_img_btn.click(
             fn=generate_image,
             outputs=[image],
         )
     run_btn.click(
         fn=generate_video,
+        inputs=[video_text_prompt, image, sample_steps],
         outputs=[output_path],
     )