File size: 1,784 Bytes

b0dfa4c

"""
Mimicked from https://huggingface.co/spaces/nvidia/ChronoEdit/blob/main/app.py
"""

from diffusers.modular_pipelines import ModularPipelineBlocks, WanModularPipeline
from diffusers.utils import load_image
from diffusers import UniPCMultistepScheduler
from modular_blocks import ChronoEditBlocks
import torch
from PIL import Image

repo_id = "nvidia/ChronoEdit-14B-Diffusers"
blocks = ChronoEditBlocks()
pipe = WanModularPipeline(blocks, repo_id)

pipe.load_components(
    trust_remote_code=True,
    device_map="cuda",
    torch_dtype={"default": torch.bfloat16, "image_encoder": torch.float32},
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=2.0)
pipe.load_lora_weights(repo_id, weight_name="lora/chronoedit_distill_lora.safetensors")
pipe.fuse_lora(lora_scale=1.0)

image = load_image("https://huggingface.co/spaces/nvidia/ChronoEdit/resolve/main/examples/3.png")
prompt = "Transform the image so that inside the floral teacup of steaming tea, a small, cute mouse is sitting and taking a bath; the mouse should look relaxed and cheerful, with a tiny white bath towel draped over its head as if enjoying a spa moment, while the steam rises gently around it, blending seamlessly with the warm and cozy atmosphere."

# image is resized within the pipeline unlike https://huggingface.co/spaces/nvidia/ChronoEdit/blob/main/app.py#L151
# refer to `ChronoEditImageInputStep`.
out = pipe(
    image=image,
    prompt=prompt,  # todo: enhance prompt
    num_inference_steps=8,  # todo: implement temporal reasoning
    num_frames=5, # https://huggingface.co/spaces/nvidia/ChronoEdit/blob/main/app.py#L152
    output_type="np",
)
frames = out.values["videos"][0]
Image.fromarray((frames[-1] * 255).clip(0, 255).astype("uint8")).save("demo.png")