WAN2-1-fast-T2V-FusioniX

Running on Zero

App Files Files Community

multimodalart HF Staff commited on May 20

Commit

1e531a7

verified ·

1 Parent(s): 18b8c31

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -15

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from huggingface_hub import hf_hub_download
 import logging
 import numpy as np
 from PIL import Image
 # --- Global Model Loading & LoRA Handling ---
 MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
@@ -71,6 +72,9 @@ SLIDER_MAX_H = 896
 SLIDER_MIN_W = 128
 SLIDER_MAX_W = 896
 def _calculate_new_dimensions_wan(pil_image: Image.Image, mod_val: int, calculation_max_area: float,
                                  min_slider_h: int, max_slider_h: int,
                                  min_slider_w: int, max_slider_w: int,
@@ -130,16 +134,17 @@ def handle_image_upload_for_dims_wan(uploaded_pil_image: Image.Image | None, cur
 # --- Gradio Interface Function ---
 @spaces.GPU
 def generate_video(input_image: Image.Image, prompt: str, negative_prompt: str,
-                   height: int, width: int, duration_seconds: float, # Changed from num_frames
                    guidance_scale: float, steps: int,
-                   progress=gr.Progress(track_tqdm=True)): # Removed fps_for_conditioning_and_export
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     # Constants for frame calculation
     FIXED_FPS = 24
-    MIN_FRAMES_MODEL = 8  # Based on original num_frames_input slider min
-    MAX_FRAMES_MODEL = 81 # Based on original num_frames_input slider max
     logger.info("Starting video generation...")
     logger.info(f"  Input Image: Uploaded (Original size: {input_image.size if input_image else 'N/A'})")
@@ -149,24 +154,25 @@ def generate_video(input_image: Image.Image, prompt: str, negative_prompt: str,
     target_height = int(height)
     target_width = int(width)
-    # duration_seconds is already float
     guidance_scale_val = float(guidance_scale)
     steps_val = int(steps)
-    # Calculate number of frames based on duration and fixed FPS
     num_frames_for_pipeline = int(round(duration_seconds * FIXED_FPS))
-    # Clamp num_frames to be within model's supported range
     num_frames_for_pipeline = max(MIN_FRAMES_MODEL, min(MAX_FRAMES_MODEL, num_frames_for_pipeline))
-    # Ensure at least MIN_FRAMES_MODEL if rounding leads to a very small number (or zero)
     if num_frames_for_pipeline < MIN_FRAMES_MODEL:
         num_frames_for_pipeline = MIN_FRAMES_MODEL
     logger.info(f"  Duration: {duration_seconds:.1f}s, Fixed FPS (conditioning & export): {FIXED_FPS}")
     logger.info(f"  Calculated Num Frames: {num_frames_for_pipeline} (clamped to [{MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL}])")
     logger.info(f"  Guidance Scale: {guidance_scale_val}, Steps: {steps_val}")
-    # Ensure dimensions are compatible.
     if target_height % MOD_VALUE_H != 0:
         logger.warning(f"Height {target_height} is not a multiple of {MOD_VALUE_H}. Adjusting...")
         target_height = (target_height // MOD_VALUE_H) * MOD_VALUE_H
@@ -188,16 +194,16 @@ def generate_video(input_image: Image.Image, prompt: str, negative_prompt: str,
             negative_prompt=negative_prompt,
             height=target_height,
             width=target_width,
-            num_frames=num_frames_for_pipeline, # Use calculated and clamped num_frames
             guidance_scale=guidance_scale_val,
             num_inference_steps=steps_val,
-            generator=torch.Generator(device="cuda").manual_seed(0)
         ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
-    export_to_video(output_frames_list, video_path, fps=FIXED_FPS) # Use fixed FPS for export
     logger.info(f"Video successfully generated and saved to {video_path}")
     return video_path
@@ -222,6 +228,21 @@ with gr.Blocks() as demo:
                     value=default_negative_prompt,
                     lines=3
                 )
                 with gr.Row():
                     height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
                     width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
@@ -253,7 +274,9 @@ with gr.Blocks() as demo:
         width_input,
         duration_seconds_input,
         guidance_scale_input,
-        steps_slider
     ]
     generate_button.click(
@@ -264,8 +287,9 @@ with gr.Blocks() as demo:
     gr.Examples(
         examples=[
-            ["peng.png", "a penguin playfully dancing in the snow, Antarctica", default_negative_prompt, 896, 512, 2, 1.0, 4],
-            ["forg.jpg", "the frog jumps around", default_negative_prompt, 448, 832, 2, 1.0, 4],
         ],
         inputs=inputs_for_click_and_examples,
         outputs=video_output,

 import logging
 import numpy as np
 from PIL import Image
+import random # Added for random seed generation
 # --- Global Model Loading & LoRA Handling ---
 MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
 SLIDER_MIN_W = 128
 SLIDER_MAX_W = 896
+# --- Constant for Seed ---
+MAX_SEED = np.iinfo(np.int32).max
 def _calculate_new_dimensions_wan(pil_image: Image.Image, mod_val: int, calculation_max_area: float,
                                  min_slider_h: int, max_slider_h: int,
                                  min_slider_w: int, max_slider_w: int,
 # --- Gradio Interface Function ---
 @spaces.GPU
 def generate_video(input_image: Image.Image, prompt: str, negative_prompt: str,
+                   height: int, width: int, duration_seconds: float,
                    guidance_scale: float, steps: int,
+                   seed: int, randomize_seed: bool,
+                   progress=gr.Progress(track_tqdm=True)):
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     # Constants for frame calculation
     FIXED_FPS = 24
+    MIN_FRAMES_MODEL = 8
+    MAX_FRAMES_MODEL = 81
     logger.info("Starting video generation...")
     logger.info(f"  Input Image: Uploaded (Original size: {input_image.size if input_image else 'N/A'})")
     target_height = int(height)
     target_width = int(width)
     guidance_scale_val = float(guidance_scale)
     steps_val = int(steps)
     num_frames_for_pipeline = int(round(duration_seconds * FIXED_FPS))
     num_frames_for_pipeline = max(MIN_FRAMES_MODEL, min(MAX_FRAMES_MODEL, num_frames_for_pipeline))
     if num_frames_for_pipeline < MIN_FRAMES_MODEL:
         num_frames_for_pipeline = MIN_FRAMES_MODEL
     logger.info(f"  Duration: {duration_seconds:.1f}s, Fixed FPS (conditioning & export): {FIXED_FPS}")
     logger.info(f"  Calculated Num Frames: {num_frames_for_pipeline} (clamped to [{MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL}])")
     logger.info(f"  Guidance Scale: {guidance_scale_val}, Steps: {steps_val}")
+    # Seed logic
+    current_seed = int(seed)
+    if randomize_seed:
+        current_seed = random.randint(0, MAX_SEED)
+    logger.info(f"  Initial Seed: {seed}, Randomize: {randomize_seed}, Using Seed: {current_seed}")
     if target_height % MOD_VALUE_H != 0:
         logger.warning(f"Height {target_height} is not a multiple of {MOD_VALUE_H}. Adjusting...")
         target_height = (target_height // MOD_VALUE_H) * MOD_VALUE_H
             negative_prompt=negative_prompt,
             height=target_height,
             width=target_width,
+            num_frames=num_frames_for_pipeline,
             guidance_scale=guidance_scale_val,
             num_inference_steps=steps_val,
+            generator=torch.Generator(device="cuda").manual_seed(current_seed) # Use current_seed
         ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
+    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
     logger.info(f"Video successfully generated and saved to {video_path}")
     return video_path
                     value=default_negative_prompt,
                     lines=3
                 )
+                # --- Added Seed Controls ---
+                seed_input = gr.Slider(
+                    label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=42, # Default seed value
+                    interactive=True
+                )
+                randomize_seed_checkbox = gr.Checkbox(
+                    label="Randomize seed",
+                    value=True, # Default to randomize
+                    interactive=True
+                )
+                # --- End of Added Seed Controls ---
                 with gr.Row():
                     height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
                     width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
         width_input,
         duration_seconds_input,
         guidance_scale_input,
+        steps_slider,
+        seed_input, # Added seed_input
+        randomize_seed_checkbox # Added randomize_seed_checkbox
     ]
     generate_button.click(
     gr.Examples(
         examples=[
+            # Added seed (e.g., 42) and randomize_seed (e.g., True) to examples
+            ["peng.png", "a penguin playfully dancing in the snow, Antarctica", default_negative_prompt, 896, 512, 2, 1.0, 4, 42, False],
+            ["forg.jpg", "the frog jumps around", default_negative_prompt, 448, 832, 2, 1.0, 4, 123, False],
         ],
         inputs=inputs_for_click_and_examples,
         outputs=video_output,