Spaces:

YiftachEde
/

Sharp-It

Running on Zero

App Files Files Community

Sharp-It / shap-e /shap_e /models /renderer.py

YiftachEde

Add shap-e without large binary files

efa71f7 9 months ago

raw

history blame

8.99 kB

	from abc import abstractmethod
	from typing import Callable, Dict, List, Optional, Tuple

	import numpy as np
	import torch

	from shap_e.models.nn.camera import (
	DifferentiableCamera,
	DifferentiableProjectiveCamera,
	get_image_coords,
	projective_camera_frame,
	)
	from shap_e.models.nn.meta import MetaModule
	from shap_e.util.collections import AttrDict


	class Renderer(MetaModule):
	"""
	A rendering abstraction that can render rays and views by calling the
	appropriate models. The models are instantiated outside but registered in
	this module.
	"""

	@abstractmethod
	def render_views(
	self,
	batch: AttrDict,
	params: Optional[Dict] = None,
	options: Optional[Dict] = None,
	) -> AttrDict:
	"""
	Returns a backproppable rendering of a view

	:param batch: contains
	- height: Optional[int]
	- width: Optional[int]
	- inner_batch_size or ray_batch_size: Optional[int] defaults to 4096 rays

	And additionally, to specify poses with a default up direction:
	- poses: [batch_size x *shape x 2 x 3] where poses[:, ..., 0, :] are the camera
	positions, and poses[:, ..., 1, :] are the z-axis (toward the object) of
	the camera frame.
	- camera: DifferentiableCamera. Assumes the same camera position
	across batch for simplicity. Could eventually support
	batched cameras.

	or to specify a batch of arbitrary poses:
	- cameras: DifferentiableCameraBatch of shape [batch_size x *shape].

	:param params: Meta parameters
	:param options: Optional[Dict]
	"""


	class RayRenderer(Renderer):
	"""
	A rendering abstraction that can render rays and views by calling the
	appropriate models. The models are instantiated outside but registered in
	this module.
	"""

	@abstractmethod
	def render_rays(
	self,
	batch: AttrDict,
	params: Optional[Dict] = None,
	options: Optional[Dict] = None,
	) -> AttrDict:
	"""
	:param batch: has
	- rays: [batch_size x ... x 2 x 3] specify the origin and direction of each ray.
	- radii (optional): [batch_size x ... x 1] the "thickness" of each ray.
	:param options: Optional[Dict]
	"""

	def render_views(
	self,
	batch: AttrDict,
	params: Optional[Dict] = None,
	options: Optional[Dict] = None,
	device: torch.device = torch.device("cuda"),
	) -> AttrDict:
	output = render_views_from_rays(
	self.render_rays,
	batch,
	params=params,
	options=options,
	device=self.device,
	)
	return output

	def forward(
	self,
	batch: AttrDict,
	params: Optional[Dict] = None,
	options: Optional[Dict] = None,
	) -> AttrDict:
	"""
	:param batch: must contain either

	- rays: [batch_size x ... x 2 x 3] specify the origin and direction of each ray.

	or

	- poses: [batch_size x 2 x 3] where poses[:, 0] are the camera
	positions, and poses[:, 1] are the z-axis (toward the object) of
	the camera frame.
	- camera: an instance of Camera that implements camera_rays

	or

	- cameras: DifferentiableCameraBatch of shape [batch_size x *shape].

	For both of the above two options, these may be specified.
	- height: Optional[int]
	- width: Optional[int]
	- ray_batch_size or inner_batch_size: Optional[int] defaults to 4096 rays

	:param params: a dictionary of optional meta parameters.
	:param options: A Dict of other hyperparameters that could be
	related to rendering or debugging

	:return: a dictionary containing

	- channels: [batch_size, *shape, n_channels]
	- distances: [batch_size, *shape, 1]
	- transmittance: [batch_size, *shape, 1]
	- aux_losses: Dict[str, torch.Tensor]
	"""

	if "rays" in batch:
	for key in ["poses", "camera", "height", "width"]:
	assert key not in batch
	return self.render_rays(batch, params=params, options=options)
	elif "poses" in batch or "cameras" in batch:
	assert "rays" not in batch
	if "poses" in batch:
	assert "camera" in batch
	else:
	assert "camera" not in batch
	return self.render_views(batch, params=params, options=options)

	raise NotImplementedError


	def get_camera_from_batch(batch: AttrDict) -> Tuple[DifferentiableCamera, int, Tuple[int]]:
	if "poses" in batch:
	assert not "cameras" in batch
	batch_size, *inner_shape, n_vecs, spatial_dim = batch.poses.shape
	assert n_vecs == 2 and spatial_dim == 3
	inner_batch_size = int(np.prod(inner_shape))
	poses = batch.poses.view(batch_size * inner_batch_size, 2, 3)
	position, direction = poses[:, 0], poses[:, 1]
	camera = projective_camera_frame(position, direction, batch.camera)
	elif "cameras" in batch:
	assert not "camera" in batch
	batch_size, *inner_shape = batch.cameras.shape
	camera = batch.cameras.flat_camera
	else:
	raise ValueError(f'neither "poses" nor "cameras" found in keys: {batch.keys()}')
	if "height" in batch and "width" in batch:
	camera = camera.resize_image(batch.width, batch.height)
	return camera, batch_size, inner_shape


	def append_tensor(val_list: Optional[List[torch.Tensor]], output: Optional[torch.Tensor]):
	if val_list is None:
	return [output]
	return val_list + [output]


	def render_views_from_rays(
	render_rays: Callable[[AttrDict, AttrDict, AttrDict], AttrDict],
	batch: AttrDict,
	params: Optional[Dict] = None,
	options: Optional[Dict] = None,
	device: torch.device = torch.device("cuda"),
	) -> AttrDict:
	camera, batch_size, inner_shape = get_camera_from_batch(batch)
	inner_batch_size = int(np.prod(inner_shape))

	coords = get_image_coords(camera.width, camera.height).to(device)
	coords = torch.broadcast_to(coords.unsqueeze(0), [batch_size * inner_batch_size, *coords.shape])
	rays = camera.camera_rays(coords)

	# mip-NeRF radii calculation from: https://github.com/google/mipnerf/blob/84c969e0a623edd183b75693aed72a7e7c22902d/internal/datasets.py#L193-L200
	directions = rays.view(batch_size, inner_batch_size, camera.height, camera.width, 2, 3)[
	..., 1, :
	]
	neighbor_dists = torch.linalg.norm(directions[:, :, :, 1:] - directions[:, :, :, :-1], dim=-1)
	neighbor_dists = torch.cat([neighbor_dists, neighbor_dists[:, :, :, -2:-1]], dim=3)
	radii = (neighbor_dists * 2 / np.sqrt(12)).view(batch_size, -1, 1)

	rays = rays.view(batch_size, inner_batch_size * camera.height * camera.width, 2, 3)

	if isinstance(camera, DifferentiableProjectiveCamera):
	# Compute the camera z direction corresponding to every ray's pixel.
	# Used for depth computations below.
	z_directions = (
	(camera.z / torch.linalg.norm(camera.z, dim=-1, keepdim=True))
	.reshape([batch_size, inner_batch_size, 1, 3])
	.repeat(1, 1, camera.width * camera.height, 1)
	.reshape(1, inner_batch_size * camera.height * camera.width, 3)
	)

	ray_batch_size = batch.get("ray_batch_size", batch.get("inner_batch_size", 4096))
	assert rays.shape[1] % ray_batch_size == 0
	n_batches = rays.shape[1] // ray_batch_size

	output_list = AttrDict(aux_losses=dict())

	for idx in range(n_batches):
	rays_batch = AttrDict(
	rays=rays[:, idx * ray_batch_size : (idx + 1) * ray_batch_size],
	radii=radii[:, idx * ray_batch_size : (idx + 1) * ray_batch_size],
	)
	output = render_rays(rays_batch, params=params, options=options)

	if isinstance(camera, DifferentiableProjectiveCamera):
	z_batch = z_directions[:, idx * ray_batch_size : (idx + 1) * ray_batch_size]
	ray_directions = rays_batch.rays[:, :, 1]
	z_dots = (ray_directions * z_batch).sum(-1, keepdim=True)
	output.depth = output.distances * z_dots

	output_list = output_list.combine(output, append_tensor)

	def _resize(val_list: List[torch.Tensor]):
	val = torch.cat(val_list, dim=1)
	assert val.shape[1] == inner_batch_size * camera.height * camera.width
	return val.view(batch_size, *inner_shape, camera.height, camera.width, -1)

	def _avg(_key: str, loss_list: List[torch.Tensor]):
	return sum(loss_list) / n_batches

	output = AttrDict(
	{name: _resize(val_list) for name, val_list in output_list.items() if name != "aux_losses"}
	)
	output.aux_losses = output_list.aux_losses.map(_avg)

	return output