Spaces:
Sleeping
Sleeping
| from typing import List, Tuple | |
| import numpy as np | |
| from PIL import Image | |
| def im_resize( | |
| image: Image.Image, | |
| size: Tuple[int, int], | |
| resample: int = Image.Resampling.BICUBIC, | |
| ) -> Image.Image: | |
| return image.resize(size, resample=resample) | |
| def normalize( | |
| image: np.ndarray, | |
| mean: List[float] = [0.5, 0.5, 0.5], | |
| std: List[float] = [0.5, 0.5, 0.5], | |
| ) -> np.ndarray: | |
| """ | |
| Normalize an image array. | |
| """ | |
| return (image - np.array(mean)) / np.array(std) | |
| def create_patches(image: Image.Image, image_patch_size=378) -> np.ndarray: | |
| """ | |
| Split the given image into a variable number of patches depending upon its | |
| resolution. | |
| """ | |
| # Start off with the global patch. | |
| patches = [im_resize(image, (image_patch_size, image_patch_size))] | |
| # Find the closest resolution template. | |
| res_templates = [(1, 2), (2, 1), (2, 2)] | |
| im_width, im_height = image.size | |
| max_dim = max(im_width, im_height) | |
| if max_dim < image_patch_size * 1.4: | |
| # If the image is already small, we just do a single patch that is a | |
| # duplicate of the global patch. This creates a small amount of | |
| # redundant computation now, but it is simpler and future-proofs us | |
| # if/when we condition the vision encoder on the patch type. | |
| patches.append(patches[0]) | |
| else: | |
| aspect_ratio = im_width / im_height | |
| res_template = min( | |
| res_templates, key=lambda size: abs((size[1] / size[0]) - aspect_ratio) | |
| ) | |
| # TODO: Actually implement patching... just going to put in the global | |
| # patch for now to make progress on other aspects. | |
| patches.append(patches[0]) | |
| return np.stack( | |
| [ | |
| normalize( | |
| (np.array(patch_img) / 255.0), | |
| mean=[0.5, 0.5, 0.5], | |
| std=[0.5, 0.5, 0.5], | |
| ).transpose(2, 0, 1) | |
| for patch_img in patches | |
| ], | |
| dtype=np.float16, | |
| ) | |