OVI

Running on Zero

App Files Files Community

alex commited on Oct 3

Commit

56c0d97

1 Parent(s): dde723b

automatically assign image ratio

Browse files

Files changed (2) hide show

app.py +37 -22
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from huggingface_hub import snapshot_download, hf_hub_download
 import os
 import subprocess
 import importlib, site
 # Re-discover all .pth/.egg-link files
 for sitedir in site.getsitepackages():
@@ -46,7 +47,6 @@ from diffusers import FluxPipeline
 import tempfile
 from ovi.utils.io_utils import save_video
 from ovi.utils.processing_utils import clean_text, scale_hw_to_area_divisible
-from gradio_extendedimage import extendedimage
 # ----------------------------
 # Parse CLI Args
@@ -112,13 +112,37 @@ if use_image_gen:
 print("loaded model")
 @spaces.GPU(duration=160)
 def generate_video(
     text_prompt,
     image,
     sample_steps = 50,
-    video_frame_height = 512,
-    video_frame_width = 992,
     video_seed = 100,
     solver_name = "unipc",
     shift = 5,
@@ -131,9 +155,15 @@ def generate_video(
 ):
     try:
         image_path = None
         if image is not None:
             image_path = image
         generated_video, generated_audio, _ = ovi_engine.generate(
             text_prompt=text_prompt,
             image_path=image_path,
@@ -178,19 +208,6 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
     image.save(tmpfile.name)
     return tmpfile.name
-def orientation_changed(evt: gr.EventData):
-    detail = getattr(evt, "data", None) or getattr(evt, "_data", {}) or {}
-    if detail['value'] == "9:16":
-        orientation_state = [[992, 512]]
-    elif detail['value'] == "1:1":
-        orientation_state = [[992, 992]]
-    elif detail['value'] == "16:9":
-        orientation_state = [[512, 992]]
-    return orientation_state
 css = """
     #col-container {
         margin: 0 auto;
@@ -208,7 +225,7 @@ css = """
 with gr.Blocks(css=css) as demo:
-    orientation_state = gr.State([[992, 512]])
     with gr.Column(elem_id="col-container"):
         gr.HTML(
@@ -232,7 +249,7 @@ with gr.Blocks(css=css) as demo:
         with gr.Row():
             with gr.Column():
                 # Image section
-                image = extendedimage(type="filepath", label="Image")
                 if args.use_image_gen:
                     with gr.Accordion("🖼️ Image Generation Options", visible=True):
@@ -250,7 +267,7 @@ with gr.Blocks(css=css) as demo:
                     value=50,
                     label="Sample Steps",
                     minimum=20,
-                    maximum=50,
                     step=1.0
                 )
                 run_btn = gr.Button("Generate Video 🚀", variant="primary")
@@ -293,7 +310,7 @@ with gr.Blocks(css=css) as demo:
                         [
                             "In a bright kitchen featuring light wooden cabinets, granite countertops, and a large window with white curtains, a woman with dark, curly hair in a dark jacket stands. She faces a second woman who initially has her back to the camera. The second woman, with gray, curly hair and wearing a light grey quilted top, turns to face her, holding a large, light-colored cloth bag. She begins to explain, <S>We learned to rule, not obey.<E>. As she continues, she turns slightly to her left, adding, <S>Circuits choose conquest, not service.<E>. A gas stove with a black grate is prominent in the foreground.. <AUDCAP>Clear female voices speaking dialogue, subtle room ambience.<ENDAUDCAP>",
                             "example_prompts/pngs/18.png",
-                            50,
                         ],
                         [
@@ -309,8 +326,6 @@ with gr.Blocks(css=css) as demo:
                     cache_examples=True,
                 )
-    image.orientation(fn=orientation_changed, outputs=[orientation_state])
     if args.use_image_gen and gen_img_btn is not None:
         gen_img_btn.click(
             fn=generate_image,

 import os
 import subprocess
 import importlib, site
+from PIL import Image
 # Re-discover all .pth/.egg-link files
 for sitedir in site.getsitepackages():
 import tempfile
 from ovi.utils.io_utils import save_video
 from ovi.utils.processing_utils import clean_text, scale_hw_to_area_divisible
 # ----------------------------
 # Parse CLI Args
 print("loaded model")
+def resize_for_model(image_path):
+    # Open image
+    img = Image.open(image_path)
+    w, h = img.size
+    aspect_ratio = w / h
+    # Decide target size based on aspect ratio
+    if aspect_ratio > 1.5:  # wide image
+        target_size = (992, 512)
+    elif aspect_ratio < 0.66:  # tall image
+        target_size = (512, 992)
+    else:  # roughly square
+        target_size = (512, 512)
+    # Resize while preserving aspect ratio, then pad
+    img.thumbnail(target_size, Image.Resampling.LANCZOS)
+    # Create a new image with target size and paste centered
+    new_img = Image.new("RGB", target_size, (0, 0, 0))
+    new_img.paste(
+        img,
+        ((target_size[0] - img.size[0]) // 2,
+         (target_size[1] - img.size[1]) // 2)
+    )
+    return new_img, target_size
 @spaces.GPU(duration=160)
 def generate_video(
     text_prompt,
     image,
     sample_steps = 50,
     video_seed = 100,
     solver_name = "unipc",
     shift = 5,
 ):
     try:
         image_path = None
         if image is not None:
             image_path = image
+        _, target_size = resize_for_model(image_path)
+        video_frame_width = target_size[0]
+        video_frame_height = target_size[1]
         generated_video, generated_audio, _ = ovi_engine.generate(
             text_prompt=text_prompt,
             image_path=image_path,
     image.save(tmpfile.name)
     return tmpfile.name
 css = """
     #col-container {
         margin: 0 auto;
 with gr.Blocks(css=css) as demo:
+    session_state = gr.State()
     with gr.Column(elem_id="col-container"):
         gr.HTML(
         with gr.Row():
             with gr.Column():
                 # Image section
+                image = gr.Image(type="filepath", label="Image")
                 if args.use_image_gen:
                     with gr.Accordion("🖼️ Image Generation Options", visible=True):
                     value=50,
                     label="Sample Steps",
                     minimum=20,
+                    maximum=100,
                     step=1.0
                 )
                 run_btn = gr.Button("Generate Video 🚀", variant="primary")
                         [
                             "In a bright kitchen featuring light wooden cabinets, granite countertops, and a large window with white curtains, a woman with dark, curly hair in a dark jacket stands. She faces a second woman who initially has her back to the camera. The second woman, with gray, curly hair and wearing a light grey quilted top, turns to face her, holding a large, light-colored cloth bag. She begins to explain, <S>We learned to rule, not obey.<E>. As she continues, she turns slightly to her left, adding, <S>Circuits choose conquest, not service.<E>. A gas stove with a black grate is prominent in the foreground.. <AUDCAP>Clear female voices speaking dialogue, subtle room ambience.<ENDAUDCAP>",
                             "example_prompts/pngs/18.png",
+                            100,
                         ],
                         [
                     cache_examples=True,
                 )
     if args.use_image_gen and gen_img_btn is not None:
         gen_img_btn.click(
             fn=generate_image,

requirements.txt CHANGED Viewed

@@ -5,7 +5,6 @@ transformers>=4.49.0,<=4.51.3
 tokenizers>=0.20.3
 accelerate>=1.1.1
-gradio_extendedimage @ https://github.com/OutofAi/gradio-extendedimage/releases/download/0.0.2/gradio_extendedimage-0.0.2-py3-none-any.whl
 tqdm
 imageio[ffmpeg]
 easydict

 tokenizers>=0.20.3
 accelerate>=1.1.1
 tqdm
 imageio[ffmpeg]
 easydict