Spaces:

wondervictor
/

ControlAR

Runtime error

App Files Files Community

wondervictor commited on Dec 6, 2024

Commit

180819f

verified ·

1 Parent(s): 2d1e0bb

Update model_new.py

Browse files

Files changed (1) hide show

model_new.py +32 -40

model_new.py CHANGED Viewed

@@ -12,33 +12,13 @@ from condition.canny import CannyDetector
 import time
 from autoregressive.models.generate import generate
 from condition.midas.depth import MidasDetector
 models = {
-    "canny": "checkpoints/canny_MR.safetensors",
-    "depth": "checkpoints/depth_MR.safetensors",
 }
-def resize_image_to_16_multiple(image, condition_type='canny'):
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    # image = Image.open(image_path)
-    width, height = image.size
-    if condition_type == 'depth':  # The depth model requires a side length that is a multiple of 32
-        new_width = (width + 31) // 32 * 32
-        new_height = (height + 31) // 32 * 32
-    else:
-        new_width = (width + 15) // 16 * 16
-        new_height = (height + 15) // 16 * 16
-    resized_image = image.resize((new_width, new_height))
-    return resized_image
 class Model:
     def __init__(self):
         self.device = torch.device(
             "cuda")
@@ -46,8 +26,9 @@ class Model:
         self.task_name = ""
         self.vq_model = self.load_vq()
         self.t5_model = self.load_t5()
-        self.gpt_model_canny = self.load_gpt(condition_type='canny')
-        # self.gpt_model_depth = self.load_gpt(condition_type='depth')
     def to(self, device):
         self.gpt_model_canny.to('cuda')
@@ -67,19 +48,17 @@ class Model:
         gpt_ckpt = models[condition_type]
         # precision = torch.bfloat16
         precision = torch.float32
-        latent_size = 768 // 16
         gpt_model = GPT_models["GPT-XL"](
             block_size=latent_size**2,
             cls_token_num=120,
             model_type='t2i',
             condition_type=condition_type,
         ).to(device='cpu', dtype=precision)
         model_weight = load_file(gpt_ckpt)
-        print("prev:", model_weight['adapter.model.embeddings.patch_embeddings.projection.weight'])
         gpt_model.load_state_dict(model_weight, strict=True)
         gpt_model.eval()
-        print("loaded:", gpt_model.adapter.model.embeddings.patch_embeddings.projection.weight)
         print("gpt model is loaded")
         return gpt_model
@@ -109,22 +88,35 @@ class Model:
         seed: int,
         low_threshold: int,
         high_threshold: int,
     ) -> list[PIL.Image.Image]:
-        print(image)
-        image = resize_image_to_16_multiple(image, 'canny')
-        W, H = image.size
-        print(W, H)
         self.t5_model.model.to('cuda').to(torch.bfloat16)
         self.gpt_model_canny.to('cuda').to(torch.bfloat16)
         self.vq_model.to('cuda')
-        condition_img = self.get_control_canny(np.array(image), low_threshold,
-                                               high_threshold)
-        condition_img = torch.from_numpy(condition_img[None, None,
-                                                       ...]).repeat(
-                                                           2, 3, 1, 1)
         condition_img = condition_img.to(self.device)
-        condition_img = 2 * (condition_img / 255 - 0.5)
         prompts = [prompt] * 2
         caption_embs, emb_masks = self.t5_model.get_text_embeddings(prompts)

 import time
 from autoregressive.models.generate import generate
 from condition.midas.depth import MidasDetector
+from preprocessor import Preprocessor
 models = {
+    "edge": "checkpoints/edge_base.safetensors",
+    "depth": "checkpoints/depth_base.safetensors",
 }
 class Model:
     def __init__(self):
         self.device = torch.device(
             "cuda")
         self.task_name = ""
         self.vq_model = self.load_vq()
         self.t5_model = self.load_t5()
+        self.gpt_model_edge = self.load_gpt(condition_type='edge')
+        self.gpt_model_depth = self.load_gpt(condition_type='depth')
+        self.preprocessor = Preprocessor()
     def to(self, device):
         self.gpt_model_canny.to('cuda')
         gpt_ckpt = models[condition_type]
         # precision = torch.bfloat16
         precision = torch.float32
+        latent_size = 512 // 16
         gpt_model = GPT_models["GPT-XL"](
             block_size=latent_size**2,
             cls_token_num=120,
             model_type='t2i',
             condition_type=condition_type,
+            adapter_size='base',
         ).to(device='cpu', dtype=precision)
         model_weight = load_file(gpt_ckpt)
         gpt_model.load_state_dict(model_weight, strict=True)
         gpt_model.eval()
         print("gpt model is loaded")
         return gpt_model
         seed: int,
         low_threshold: int,
         high_threshold: int,
+        control_strength: float,
+        preprocessor_name: str,
     ) -> list[PIL.Image.Image]:
         self.t5_model.model.to('cuda').to(torch.bfloat16)
         self.gpt_model_canny.to('cuda').to(torch.bfloat16)
         self.vq_model.to('cuda')
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        origin_W, origin_H = image.size
+        if preprocessor_name == 'Canny':
+            self.preprocessor.load("Canny")
+            condition_img = self.preprocessor(
+                image=image, low_threshold=low_threshold, high_threshold=high_threshold, detect_resolution=512)
+        elif preprocessor_name == 'Hed':
+            self.preprocessor.load("HED")
+            condition_img = self.preprocessor(
+                image=image,image_resolution=512, detect_resolution=512)
+        elif preprocessor_name == 'Lineart':
+            self.preprocessor.load("Lineart")
+            condition_img = self.preprocessor(
+                image=image,image_resolution=512, detect_resolution=512)
+        elif preprocessor_name == 'No preprocess':
+            condition_img = image
+        condition_img = condition_img.resize((512,512))
+        W, H = condition_img.size
+        condition_img = torch.from_numpy(np.array(condition_img)).unsqueeze(0).permute(0,3,1,2).repeat(2,1,1,1)
         condition_img = condition_img.to(self.device)
+        condition_img = 2*(condition_img/255 - 0.5)
         prompts = [prompt] * 2
         caption_embs, emb_masks = self.t5_model.get_text_embeddings(prompts)