Spaces:

ziyangmai
/

MotionInversion

Runtime error

App Files Files Community

MotionInversion / loss /base_loss.py

ziyangmai

page demo

113884e about 1 year ago

raw

history blame contribute delete

2.57 kB

	import torch
	import torch.nn.functional as F
	from utils.func_utils import tensor_to_vae_latent, sample_noise

	def BaseLoss(
	train_loss_temporal,
	accelerator,
	optimizers,
	lr_schedulers,
	unet,
	vae,
	text_encoder,
	noise_scheduler,
	batch,
	step,
	config
	):
	cache_latents = config.train.cache_latents

	if not cache_latents:
	latents = tensor_to_vae_latent(batch["pixel_values"], vae)
	else:
	latents = batch["latents"]

	# Sample noise that we'll add to the latents
	# use_offset_noise = use_offset_noise and not rescale_schedule

	noise = sample_noise(latents, 0.1, False)
	bsz = latents.shape[0]

	# Sample a random timestep for each video
	timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
	timesteps = timesteps.long()

	# Add noise to the latents according to the noise magnitude at each timestep
	# (this is the forward diffusion process)
	noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

	# Potentially Fixes gradient checkpointing training.
	# See: https://github.com/prigoyal/pytorch_memonger/blob/master/tutorial/Checkpointing_for_PyTorch_models.ipynb
	# if kwargs.get('eval_train', False):
	# unet.eval()
	# text_encoder.eval()

	# Encode text embeddings
	token_ids = batch['prompt_ids']
	encoder_hidden_states = text_encoder(token_ids)[0]
	detached_encoder_state = encoder_hidden_states.clone().detach()

	# Get the target for loss depending on the prediction type
	if noise_scheduler.config.prediction_type == "epsilon":
	target = noise

	elif noise_scheduler.config.prediction_type == "v_prediction":
	target = noise_scheduler.get_velocity(latents, noise, timesteps)

	else:
	raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")

	encoder_hidden_states = detached_encoder_state


	# optimization
	model_pred = unet(noisy_latents, timesteps, encoder_hidden_states=encoder_hidden_states).sample
	loss_temporal = F.mse_loss(model_pred.float(), target.float(), reduction="mean")

	avg_loss_temporal = accelerator.gather(loss_temporal.repeat(config.train.train_batch_size)).mean()
	train_loss_temporal += avg_loss_temporal.item() / config.train.gradient_accumulation_steps

	accelerator.backward(loss_temporal)
	optimizers[0].step()
	lr_schedulers[0].step()

	return loss_temporal, train_loss_temporal