diffusers-internal-dev
/

chronoedit-modular

modular_diffusers

Model card Files Files and versions

chronoedit-modular / inputs.py

sayakpaul's picture

sayakpaul HF Staff

Upload folder using huggingface_hub

fd6ab1f verified 16 days ago

history blame contribute delete

3.51 kB

	# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from diffusers.modular_pipelines import ModularPipelineBlocks, InputParam, OutputParam, ModularPipeline, PipelineState
	import numpy as np
	import torch
	import PIL
	from typing import List
	from diffusers.modular_pipelines.wan.before_denoise import WanInputStep


	def calculate_dimensions(image, mod_value):
	"""
	Calculate output dimensions based on resolution settings.

	Args:
	image: PIL Image
	mod_value: Modulo value for dimension alignment

	Returns:
	Tuple of (width, height)
	"""

	# Get max area from preset or override
	target_area = 720 * 1280

	# Calculate dimensions maintaining aspect ratio
	aspect_ratio = image.height / image.width
	calculated_height = round(np.sqrt(target_area * aspect_ratio)) // mod_value * mod_value
	calculated_width = round(np.sqrt(target_area / aspect_ratio)) // mod_value * mod_value

	return calculated_width, calculated_height


	# Make the input step aware of `negative_prompt_embeds`.
	# ChronoEdit uses a `guidance_scale` of 1.
	class ChronoEditInputStep(WanInputStep):
	model_name = "chronoedit"

	@property
	def inputs(self) -> List[InputParam]:
	return [
	InputParam("num_videos_per_prompt", default=1),
	InputParam(
	"prompt_embeds",
	required=True,
	type_hint=torch.Tensor,
	description="Pre-generated text embeddings. Can be generated from text_encoder step.",
	),
	InputParam(
	"negative_prompt_embeds",
	type_hint=torch.Tensor,
	description="Pre-generated negative text embeddings. Can be generated from text_encoder step.",
	),
	]


	class ChronoEditImageInputStep(ModularPipelineBlocks):
	model_name = "chronoedit"

	@property
	def inputs(self) -> List[InputParam]:
	return [InputParam(name="image")]

	@property
	def intermediate_outputs(self) -> List[OutputParam]:
	return [
	OutputParam(name="image", type_hint=PIL.Image.Image),
	OutputParam(name="height", type_hint=int, description="The height set w.r.t input image and specs"),
	OutputParam(name="width", type_hint=int, description="The width set w.r.t input image and specs"),
	]

	def __call__(self, components: ModularPipeline, state: PipelineState) -> PipelineState:
	block_state = self.get_block_state(state)
	image = block_state.image
	mod_value = components.vae_scale_factor_spatial * components.transformer.config.patch_size[1]

	width, height = calculate_dimensions(image, mod_value)
	block_state.image = image.resize((width, height))
	block_state.height = height
	block_state.width = width

	self.set_block_state(state, block_state)
	return components, state