Spaces:

fffiloni
/

ReNO

Sleeping

App Files Files Community

ReNO / rewards /aesthetic.py

fffiloni

Upload 24 files

ca25718 verified about 1 year ago

raw

history blame contribute delete

3.77 kB

	import os

	import clip
	import pytorch_lightning as pl
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from rewards.base_reward import BaseRewardLoss


	class AestheticLoss(BaseRewardLoss):
	"""CLIP reward loss function for optimization."""

	def __init__(
	self,
	weigthing: float,
	dtype: torch.dtype,
	device: torch.device,
	cache_dir: str,
	memsave: bool = False,
	):
	self.clip_model, self.preprocess_fn = clip.load(
	"ViT-L/14", device=device, download_root=cache_dir
	)
	self.clip_model = self.clip_model.to(device, dtype=dtype)
	self.mlp = MLP(768).to(device, dtype=dtype)
	s = torch.load(
	f"{os.getcwd()}/ckpts/aesthetic-model.pth"
	) # load the model you trained previously or the model available in this repo
	self.mlp.load_state_dict(s)
	self.clip_model.eval()
	if memsave:
	import memsave_torch.nn

	self.mlp = memsave_torch.nn.convert_to_memory_saving(self.mlp)
	self.clip_model = memsave_torch.nn.convert_to_memory_saving(
	self.clip_model
	).to(device, dtype=dtype)

	self.freeze_parameters(self.clip_model.parameters())
	self.freeze_parameters(self.mlp.parameters())
	super().__init__("Aesthetic", weigthing)

	def get_image_features(self, image: torch.Tensor) -> torch.Tensor:
	with torch.autocast("cuda"):
	clip_img_features = self.clip_model.encode_image(image)
	l2 = torch.norm(clip_img_features, p=2, dim=-1, keepdim=True)
	l2 = torch.where(
	l2 == 0,
	torch.tensor(
	1.0, device=clip_img_features.device, dtype=clip_img_features.dtype
	),
	l2,
	)
	clip_img_features = clip_img_features / l2
	return clip_img_features

	def get_text_features(self, prompt: str) -> torch.Tensor:
	return None

	def compute_loss(
	self, image_features: torch.Tensor, text_features: torch.Tensor
	) -> torch.Tensor:
	return None

	def __call__(self, image: torch.Tensor, prompt: torch.Tensor) -> torch.Tensor:
	if self.memsave:
	image = image.to(torch.float32)
	image_features = self.get_image_features(image)

	image_features_normed = self.process_features(image_features.to(torch.float16))

	aesthetic_loss = 10.0 - self.mlp(image_features_normed).mean()
	return aesthetic_loss


	class MLP(pl.LightningModule):
	def __init__(self, input_size, xcol="emb", ycol="avg_rating"):
	super().__init__()
	self.input_size = input_size
	self.xcol = xcol
	self.ycol = ycol
	self.layers = nn.Sequential(
	nn.Linear(self.input_size, 1024),
	# nn.ReLU(),
	nn.Dropout(0.2),
	nn.Linear(1024, 128),
	# nn.ReLU(),
	nn.Dropout(0.2),
	nn.Linear(128, 64),
	# nn.ReLU(),
	nn.Dropout(0.1),
	nn.Linear(64, 16),
	# nn.ReLU(),
	nn.Linear(16, 1),
	)

	def forward(self, x):
	return self.layers(x)

	def training_step(self, batch, batch_idx):
	x = batch[self.xcol]
	y = batch[self.ycol].reshape(-1, 1)
	x_hat = self.layers(x)
	loss = F.mse_loss(x_hat, y)
	return loss

	def validation_step(self, batch, batch_idx):
	x = batch[self.xcol]
	y = batch[self.ycol].reshape(-1, 1)
	x_hat = self.layers(x)
	loss = F.mse_loss(x_hat, y)
	return loss

	def configure_optimizers(self):
	optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
	return optimizer