File size: 1,784 Bytes
b0dfa4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
"""
Mimicked from https://huggingface.co/spaces/nvidia/ChronoEdit/blob/main/app.py
"""
from diffusers.modular_pipelines import ModularPipelineBlocks, WanModularPipeline
from diffusers.utils import load_image
from diffusers import UniPCMultistepScheduler
from modular_blocks import ChronoEditBlocks
import torch
from PIL import Image
repo_id = "nvidia/ChronoEdit-14B-Diffusers"
blocks = ChronoEditBlocks()
pipe = WanModularPipeline(blocks, repo_id)
pipe.load_components(
trust_remote_code=True,
device_map="cuda",
torch_dtype={"default": torch.bfloat16, "image_encoder": torch.float32},
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=2.0)
pipe.load_lora_weights(repo_id, weight_name="lora/chronoedit_distill_lora.safetensors")
pipe.fuse_lora(lora_scale=1.0)
image = load_image("https://huggingface.co/spaces/nvidia/ChronoEdit/resolve/main/examples/3.png")
prompt = "Transform the image so that inside the floral teacup of steaming tea, a small, cute mouse is sitting and taking a bath; the mouse should look relaxed and cheerful, with a tiny white bath towel draped over its head as if enjoying a spa moment, while the steam rises gently around it, blending seamlessly with the warm and cozy atmosphere."
# image is resized within the pipeline unlike https://huggingface.co/spaces/nvidia/ChronoEdit/blob/main/app.py#L151
# refer to `ChronoEditImageInputStep`.
out = pipe(
image=image,
prompt=prompt, # todo: enhance prompt
num_inference_steps=8, # todo: implement temporal reasoning
num_frames=5, # https://huggingface.co/spaces/nvidia/ChronoEdit/blob/main/app.py#L152
output_type="np",
)
frames = out.values["videos"][0]
Image.fromarray((frames[-1] * 255).clip(0, 255).astype("uint8")).save("demo.png")
|