UDiffText

Build error

App Files Files Community

ZYMPKU commited on Dec 16, 2023

Commit

8841787

1 Parent(s): 5418bad

examples

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +4 -4
demo/examples/{Peaceful_0_0.jpeg → CREEK_0.jpg} +2 -2
demo/examples/{FAVOURITE_0_0.jpeg → Delivery_0.jpg} +2 -2
demo/examples/{FRONTIER_0_0.png → HAPPEN_0.jpg} +2 -2
demo/examples/{TREE_0_0.png → WORDS_0.jpg} +2 -2
demo/examples/{better_0_0.jpg → better_0.jpg} +0 -0
sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc +0 -0
sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc +0 -0
sgm/modules/diffusionmodules/guiders.py +2 -1
sgm/modules/diffusionmodules/sampling.py +8 -43
temp/attn_map/attn_map_1.png +0 -0
temp/attn_map/attn_map_10.png +0 -0
temp/attn_map/attn_map_11.png +0 -0
temp/attn_map/attn_map_12.png +0 -0
temp/attn_map/attn_map_13.png +0 -0
temp/attn_map/attn_map_14.png +0 -0
temp/attn_map/attn_map_15.png +0 -0
temp/attn_map/attn_map_16.png +0 -0
temp/attn_map/attn_map_17.png +0 -0
temp/attn_map/attn_map_18.png +0 -0
temp/attn_map/attn_map_19.png +0 -0
temp/attn_map/attn_map_2.png +0 -0
temp/attn_map/attn_map_20.png +0 -0
temp/attn_map/attn_map_21.png +0 -0
temp/attn_map/attn_map_22.png +0 -0
temp/attn_map/attn_map_23.png +0 -0
temp/attn_map/attn_map_24.png +0 -0
temp/attn_map/attn_map_25.png +0 -0
temp/attn_map/attn_map_26.png +0 -0
temp/attn_map/attn_map_27.png +0 -0
temp/attn_map/attn_map_28.png +0 -0
temp/attn_map/attn_map_29.png +0 -0
temp/attn_map/attn_map_3.png +0 -0
temp/attn_map/attn_map_4.png +0 -0
temp/attn_map/attn_map_5.png +0 -0
temp/attn_map/attn_map_6.png +0 -0
temp/attn_map/attn_map_7.png +0 -0
temp/attn_map/attn_map_8.png +0 -0
temp/attn_map/attn_map_9.png +0 -0
temp/seg_map/seg_1.npy +3 -0
temp/seg_map/seg_1.png +0 -0
temp/seg_map/seg_10.npy +3 -0
temp/seg_map/seg_11.npy +3 -0
temp/seg_map/seg_12.npy +3 -0
temp/seg_map/seg_13.npy +3 -0
temp/seg_map/seg_14.npy +3 -0
temp/seg_map/seg_15.npy +3 -0
temp/seg_map/seg_16.npy +3 -0
temp/seg_map/seg_17.npy +3 -0
temp/seg_map/seg_18.npy +3 -0

app.py CHANGED Viewed

@@ -171,7 +171,7 @@ if __name__ == "__main__":
     model = init_model(cfgs)
     sampler = init_sampling(cfgs)
     global_index = 0
-    resize = Resize((cfgs.H, cfgs.W))
     block = gr.Blocks().queue()
     with block:
@@ -202,7 +202,7 @@ if __name__ == "__main__":
             with gr.Column():
                 input_blk = gr.Image(source='upload', tool='sketch', type="numpy", label="Input", height=512)
-                gr.Markdown("Notice: please draw horizontally to indicate only **one** masked area.")
                 text = gr.Textbox(label="Text to render: (1~12 characters)", info="the text you want to render at the masked region")
                 run_button = gr.Button(variant="primary")
@@ -210,9 +210,9 @@ if __name__ == "__main__":
                     num_samples = gr.Slider(label="Images", info="number of generated images, locked as 1", minimum=1, maximum=1, value=1, step=1)
                     steps = gr.Slider(label="Steps", info ="denoising sampling steps", minimum=1, maximum=200, value=50, step=1)
-                    scale = gr.Slider(label="Guidance Scale", info="the scale of classifier-free guidance (CFG)", minimum=0.0, maximum=10.0, value=5.0, step=0.1)
                     seed = gr.Slider(label="Seed", info="random seed for noise initialization", minimum=0, maximum=2147483647, step=1, randomize=True)
-                    show_detail = gr.Checkbox(label="Show Detail", info="show the additional visualization results", value=False)
             with gr.Column():

     model = init_model(cfgs)
     sampler = init_sampling(cfgs)
     global_index = 0
+    resize = Resize((cfgs.H, cfgs.W), antialias=True)
     block = gr.Blocks().queue()
     with block:
             with gr.Column():
                 input_blk = gr.Image(source='upload', tool='sketch', type="numpy", label="Input", height=512)
+                gr.Markdown("Notice: please draw horizontally to indicate only **one** masked area. The image may be cropped automatically into a proper scale.")
                 text = gr.Textbox(label="Text to render: (1~12 characters)", info="the text you want to render at the masked region")
                 run_button = gr.Button(variant="primary")
                     num_samples = gr.Slider(label="Images", info="number of generated images, locked as 1", minimum=1, maximum=1, value=1, step=1)
                     steps = gr.Slider(label="Steps", info ="denoising sampling steps", minimum=1, maximum=200, value=50, step=1)
+                    scale = gr.Slider(label="Guidance Scale", info="the scale of classifier-free guidance (CFG)", minimum=0.0, maximum=10.0, value=4.0, step=0.1)
                     seed = gr.Slider(label="Seed", info="random seed for noise initialization", minimum=0, maximum=2147483647, step=1, randomize=True)
+                    show_detail = gr.Checkbox(label="Show Detail", info="show the additional visualization results", value=True)
             with gr.Column():

demo/examples/{Peaceful_0_0.jpeg → CREEK_0.jpg} RENAMED Viewed

File without changes

demo/examples/{FAVOURITE_0_0.jpeg → Delivery_0.jpg} RENAMED Viewed

File without changes

demo/examples/{FRONTIER_0_0.png → HAPPEN_0.jpg} RENAMED Viewed

File without changes

demo/examples/{TREE_0_0.png → WORDS_0.jpg} RENAMED Viewed

File without changes

demo/examples/{better_0_0.jpg → better_0.jpg} RENAMED Viewed

File without changes

sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc CHANGED Viewed

Binary files a/sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc and b/sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc differ

sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc CHANGED Viewed

Binary files a/sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc and b/sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc differ

sgm/modules/diffusionmodules/guiders.py CHANGED Viewed

@@ -13,6 +13,7 @@ class VanillaCFG:
     def __init__(self, scale, dyn_thresh_config=None):
         scale_schedule = lambda scale, sigma: scale  # independent of step
         self.scale_schedule = partial(scale_schedule, scale)
         self.dyn_thresh = instantiate_from_config(
             default(
                 dyn_thresh_config,
@@ -24,7 +25,7 @@ class VanillaCFG:
     def __call__(self, x, sigma):
         x_u, x_c = x.chunk(2)
-        scale_value = self.scale_schedule(sigma)
         x_pred = self.dyn_thresh(x_u, x_c, scale_value)
         return x_pred

     def __init__(self, scale, dyn_thresh_config=None):
         scale_schedule = lambda scale, sigma: scale  # independent of step
         self.scale_schedule = partial(scale_schedule, scale)
+        self.scale_value = scale
         self.dyn_thresh = instantiate_from_config(
             default(
                 dyn_thresh_config,
     def __call__(self, x, sigma):
         x_u, x_c = x.chunk(2)
+        scale_value = self.scale_value
         x_pred = self.dyn_thresh(x_u, x_c, scale_value)
         return x_pred

sgm/modules/diffusionmodules/sampling.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Dict, Union
 import imageio
 import torch
-import json
 import numpy as np
 import torch.nn.functional as F
 from omegaconf import ListConfig, OmegaConf
@@ -252,47 +251,15 @@ class EulerEDMSampler(EDMSampler):
         return x
-    def create_pascal_label_colormap(self):
-        """
-        PASCAL VOC 分割数据集的类别标签颜色映射label colormap
-        返回:
-            可视化分割结果的颜色映射Colormap
-        """
-        colormap = np.zeros((256, 3), dtype=int)
-        ind = np.arange(256, dtype=int)
-        for shift in reversed(range(8)):
-            for channel in range(3):
-                colormap[:, channel] |= ((ind >> channel) & 1) << shift
-            ind >>= 3
-        return colormap
-    def save_segment_map(self, image, attn_maps, tokens=None, save_name=None):
-        colormap = self.create_pascal_label_colormap()
-        H, W = image.shape[-2:]
-        image_ = image*0.3
         sections = []
         for i in range(len(tokens)):
             attn_map = attn_maps[i]
-            attn_map_t = np.tile(attn_map[None], (1,3,1,1)) # b, 3, h, w
-            attn_map_t = torch.from_numpy(attn_map_t)
-            attn_map_t = F.interpolate(attn_map_t, (W, H))
-            color = torch.from_numpy(colormap[i+1][None,:,None,None] / 255.0)
-            colored_attn_map = attn_map_t * color
-            colored_attn_map = colored_attn_map.to(device=image_.device)
-            image_ += colored_attn_map*0.7
             sections.append(attn_map)
         section = np.stack(sections)
-        np.save(f"temp/seg_map/seg_{save_name}.npy", section)
-        save_image(image_, f"temp/seg_map/seg_{save_name}.png", normalize=True)
     def get_init_noise(self, cfgs, model, cond, batch, uc=None):
@@ -376,8 +343,7 @@ class EulerEDMSampler(EDMSampler):
             local_loss = torch.zeros(1)
         if save_attn:
             attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
-            denoised_decode = model.decode_first_stage(denoised) if denoised_decode is None else denoised_decode
-            self.save_segment_map(denoised_decode, attn_map, tokens=batch["label"][0], save_name=name)
         d = to_d(x, sigma_hat, denoised)
         dt = append_dims(next_sigma - sigma_hat, x.ndim)
@@ -410,7 +376,7 @@ class EulerEDMSampler(EDMSampler):
             alpha = 20 * np.sqrt(scales[i])
             update = aae_enabled
-            save_loss = detailed
             save_attn = detailed and (i == (num_sigmas-1)//2)
             save_inter = aae_enabled
@@ -452,7 +418,7 @@ class EulerEDMSampler(EDMSampler):
             imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)
         return x
 class EulerEDMDualSampler(EulerEDMSampler):
@@ -557,9 +523,8 @@ class EulerEDMDualSampler(EulerEDMSampler):
         else:
             local_loss = torch.zeros(1)
         if save_attn:
-            attn_map = model.model.diffusion_model.save_attn_map(save_name=name, save_single=True)
-            denoised_decode = model.decode_first_stage(denoised) if denoised_decode is None else denoised_decode
-            self.save_segment_map(denoised_decode, attn_map, tokens=batch["label"][0], save_name=name)
         d = to_d(x, sigma_hat, denoised)
         dt = append_dims(next_sigma - sigma_hat, x.ndim)
@@ -632,7 +597,7 @@ class EulerEDMDualSampler(EulerEDMSampler):
         print(f"Local losses: {local_losses}")
         if len(inters) > 0:
-            imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.1)
         return x

 import imageio
 import torch
 import numpy as np
 import torch.nn.functional as F
 from omegaconf import ListConfig, OmegaConf
         return x
+    def save_segment_map(self, attn_maps, tokens=None, save_name=None):
         sections = []
         for i in range(len(tokens)):
             attn_map = attn_maps[i]
             sections.append(attn_map)
         section = np.stack(sections)
+        np.save(f"./temp/seg_map/seg_{save_name}.npy", section)
     def get_init_noise(self, cfgs, model, cond, batch, uc=None):
             local_loss = torch.zeros(1)
         if save_attn:
             attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
+            self.save_segment_map(attn_map, tokens=batch["label"][0], save_name=name)
         d = to_d(x, sigma_hat, denoised)
         dt = append_dims(next_sigma - sigma_hat, x.ndim)
             alpha = 20 * np.sqrt(scales[i])
             update = aae_enabled
+            save_loss = aae_enabled
             save_attn = detailed and (i == (num_sigmas-1)//2)
             save_inter = aae_enabled
             imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)
         return x
 class EulerEDMDualSampler(EulerEDMSampler):
         else:
             local_loss = torch.zeros(1)
         if save_attn:
+            attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
+            self.save_segment_map(attn_map, tokens=batch["label"][0], save_name=name)
         d = to_d(x, sigma_hat, denoised)
         dt = append_dims(next_sigma - sigma_hat, x.ndim)
         print(f"Local losses: {local_losses}")
         if len(inters) > 0:
+            imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)
         return x