examples
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +4 -4
- demo/examples/{Peaceful_0_0.jpeg → CREEK_0.jpg} +2 -2
- demo/examples/{FAVOURITE_0_0.jpeg → Delivery_0.jpg} +2 -2
- demo/examples/{FRONTIER_0_0.png → HAPPEN_0.jpg} +2 -2
- demo/examples/{TREE_0_0.png → WORDS_0.jpg} +2 -2
- demo/examples/{better_0_0.jpg → better_0.jpg} +0 -0
- sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc +0 -0
- sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc +0 -0
- sgm/modules/diffusionmodules/guiders.py +2 -1
- sgm/modules/diffusionmodules/sampling.py +8 -43
- temp/attn_map/attn_map_1.png +0 -0
- temp/attn_map/attn_map_10.png +0 -0
- temp/attn_map/attn_map_11.png +0 -0
- temp/attn_map/attn_map_12.png +0 -0
- temp/attn_map/attn_map_13.png +0 -0
- temp/attn_map/attn_map_14.png +0 -0
- temp/attn_map/attn_map_15.png +0 -0
- temp/attn_map/attn_map_16.png +0 -0
- temp/attn_map/attn_map_17.png +0 -0
- temp/attn_map/attn_map_18.png +0 -0
- temp/attn_map/attn_map_19.png +0 -0
- temp/attn_map/attn_map_2.png +0 -0
- temp/attn_map/attn_map_20.png +0 -0
- temp/attn_map/attn_map_21.png +0 -0
- temp/attn_map/attn_map_22.png +0 -0
- temp/attn_map/attn_map_23.png +0 -0
- temp/attn_map/attn_map_24.png +0 -0
- temp/attn_map/attn_map_25.png +0 -0
- temp/attn_map/attn_map_26.png +0 -0
- temp/attn_map/attn_map_27.png +0 -0
- temp/attn_map/attn_map_28.png +0 -0
- temp/attn_map/attn_map_29.png +0 -0
- temp/attn_map/attn_map_3.png +0 -0
- temp/attn_map/attn_map_4.png +0 -0
- temp/attn_map/attn_map_5.png +0 -0
- temp/attn_map/attn_map_6.png +0 -0
- temp/attn_map/attn_map_7.png +0 -0
- temp/attn_map/attn_map_8.png +0 -0
- temp/attn_map/attn_map_9.png +0 -0
- temp/seg_map/seg_1.npy +3 -0
- temp/seg_map/seg_1.png +0 -0
- temp/seg_map/seg_10.npy +3 -0
- temp/seg_map/seg_11.npy +3 -0
- temp/seg_map/seg_12.npy +3 -0
- temp/seg_map/seg_13.npy +3 -0
- temp/seg_map/seg_14.npy +3 -0
- temp/seg_map/seg_15.npy +3 -0
- temp/seg_map/seg_16.npy +3 -0
- temp/seg_map/seg_17.npy +3 -0
- temp/seg_map/seg_18.npy +3 -0
app.py
CHANGED
|
@@ -171,7 +171,7 @@ if __name__ == "__main__":
|
|
| 171 |
model = init_model(cfgs)
|
| 172 |
sampler = init_sampling(cfgs)
|
| 173 |
global_index = 0
|
| 174 |
-
resize = Resize((cfgs.H, cfgs.W))
|
| 175 |
|
| 176 |
block = gr.Blocks().queue()
|
| 177 |
with block:
|
|
@@ -202,7 +202,7 @@ if __name__ == "__main__":
|
|
| 202 |
with gr.Column():
|
| 203 |
|
| 204 |
input_blk = gr.Image(source='upload', tool='sketch', type="numpy", label="Input", height=512)
|
| 205 |
-
gr.Markdown("Notice: please draw horizontally to indicate only **one** masked area.")
|
| 206 |
text = gr.Textbox(label="Text to render: (1~12 characters)", info="the text you want to render at the masked region")
|
| 207 |
run_button = gr.Button(variant="primary")
|
| 208 |
|
|
@@ -210,9 +210,9 @@ if __name__ == "__main__":
|
|
| 210 |
|
| 211 |
num_samples = gr.Slider(label="Images", info="number of generated images, locked as 1", minimum=1, maximum=1, value=1, step=1)
|
| 212 |
steps = gr.Slider(label="Steps", info ="denoising sampling steps", minimum=1, maximum=200, value=50, step=1)
|
| 213 |
-
scale = gr.Slider(label="Guidance Scale", info="the scale of classifier-free guidance (CFG)", minimum=0.0, maximum=10.0, value=
|
| 214 |
seed = gr.Slider(label="Seed", info="random seed for noise initialization", minimum=0, maximum=2147483647, step=1, randomize=True)
|
| 215 |
-
show_detail = gr.Checkbox(label="Show Detail", info="show the additional visualization results", value=
|
| 216 |
|
| 217 |
with gr.Column():
|
| 218 |
|
|
|
|
| 171 |
model = init_model(cfgs)
|
| 172 |
sampler = init_sampling(cfgs)
|
| 173 |
global_index = 0
|
| 174 |
+
resize = Resize((cfgs.H, cfgs.W), antialias=True)
|
| 175 |
|
| 176 |
block = gr.Blocks().queue()
|
| 177 |
with block:
|
|
|
|
| 202 |
with gr.Column():
|
| 203 |
|
| 204 |
input_blk = gr.Image(source='upload', tool='sketch', type="numpy", label="Input", height=512)
|
| 205 |
+
gr.Markdown("Notice: please draw horizontally to indicate only **one** masked area. The image may be cropped automatically into a proper scale.")
|
| 206 |
text = gr.Textbox(label="Text to render: (1~12 characters)", info="the text you want to render at the masked region")
|
| 207 |
run_button = gr.Button(variant="primary")
|
| 208 |
|
|
|
|
| 210 |
|
| 211 |
num_samples = gr.Slider(label="Images", info="number of generated images, locked as 1", minimum=1, maximum=1, value=1, step=1)
|
| 212 |
steps = gr.Slider(label="Steps", info ="denoising sampling steps", minimum=1, maximum=200, value=50, step=1)
|
| 213 |
+
scale = gr.Slider(label="Guidance Scale", info="the scale of classifier-free guidance (CFG)", minimum=0.0, maximum=10.0, value=4.0, step=0.1)
|
| 214 |
seed = gr.Slider(label="Seed", info="random seed for noise initialization", minimum=0, maximum=2147483647, step=1, randomize=True)
|
| 215 |
+
show_detail = gr.Checkbox(label="Show Detail", info="show the additional visualization results", value=True)
|
| 216 |
|
| 217 |
with gr.Column():
|
| 218 |
|
demo/examples/{Peaceful_0_0.jpeg → CREEK_0.jpg}
RENAMED
|
File without changes
|
demo/examples/{FAVOURITE_0_0.jpeg → Delivery_0.jpg}
RENAMED
|
File without changes
|
demo/examples/{FRONTIER_0_0.png → HAPPEN_0.jpg}
RENAMED
|
File without changes
|
demo/examples/{TREE_0_0.png → WORDS_0.jpg}
RENAMED
|
File without changes
|
demo/examples/{better_0_0.jpg → better_0.jpg}
RENAMED
|
File without changes
|
sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc
CHANGED
|
Binary files a/sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc and b/sgm/modules/diffusionmodules/__pycache__/guiders.cpython-311.pyc differ
|
|
|
sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc
CHANGED
|
Binary files a/sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc and b/sgm/modules/diffusionmodules/__pycache__/sampling.cpython-311.pyc differ
|
|
|
sgm/modules/diffusionmodules/guiders.py
CHANGED
|
@@ -13,6 +13,7 @@ class VanillaCFG:
|
|
| 13 |
def __init__(self, scale, dyn_thresh_config=None):
|
| 14 |
scale_schedule = lambda scale, sigma: scale # independent of step
|
| 15 |
self.scale_schedule = partial(scale_schedule, scale)
|
|
|
|
| 16 |
self.dyn_thresh = instantiate_from_config(
|
| 17 |
default(
|
| 18 |
dyn_thresh_config,
|
|
@@ -24,7 +25,7 @@ class VanillaCFG:
|
|
| 24 |
|
| 25 |
def __call__(self, x, sigma):
|
| 26 |
x_u, x_c = x.chunk(2)
|
| 27 |
-
scale_value = self.
|
| 28 |
x_pred = self.dyn_thresh(x_u, x_c, scale_value)
|
| 29 |
return x_pred
|
| 30 |
|
|
|
|
| 13 |
def __init__(self, scale, dyn_thresh_config=None):
|
| 14 |
scale_schedule = lambda scale, sigma: scale # independent of step
|
| 15 |
self.scale_schedule = partial(scale_schedule, scale)
|
| 16 |
+
self.scale_value = scale
|
| 17 |
self.dyn_thresh = instantiate_from_config(
|
| 18 |
default(
|
| 19 |
dyn_thresh_config,
|
|
|
|
| 25 |
|
| 26 |
def __call__(self, x, sigma):
|
| 27 |
x_u, x_c = x.chunk(2)
|
| 28 |
+
scale_value = self.scale_value
|
| 29 |
x_pred = self.dyn_thresh(x_u, x_c, scale_value)
|
| 30 |
return x_pred
|
| 31 |
|
sgm/modules/diffusionmodules/sampling.py
CHANGED
|
@@ -7,7 +7,6 @@ from typing import Dict, Union
|
|
| 7 |
|
| 8 |
import imageio
|
| 9 |
import torch
|
| 10 |
-
import json
|
| 11 |
import numpy as np
|
| 12 |
import torch.nn.functional as F
|
| 13 |
from omegaconf import ListConfig, OmegaConf
|
|
@@ -252,47 +251,15 @@ class EulerEDMSampler(EDMSampler):
|
|
| 252 |
|
| 253 |
return x
|
| 254 |
|
| 255 |
-
def
|
| 256 |
-
"""
|
| 257 |
-
PASCAL VOC 分割数据集的类别标签颜色映射label colormap
|
| 258 |
|
| 259 |
-
返回:
|
| 260 |
-
可视化分割结果的颜色映射Colormap
|
| 261 |
-
"""
|
| 262 |
-
colormap = np.zeros((256, 3), dtype=int)
|
| 263 |
-
ind = np.arange(256, dtype=int)
|
| 264 |
-
|
| 265 |
-
for shift in reversed(range(8)):
|
| 266 |
-
for channel in range(3):
|
| 267 |
-
colormap[:, channel] |= ((ind >> channel) & 1) << shift
|
| 268 |
-
ind >>= 3
|
| 269 |
-
|
| 270 |
-
return colormap
|
| 271 |
-
|
| 272 |
-
def save_segment_map(self, image, attn_maps, tokens=None, save_name=None):
|
| 273 |
-
|
| 274 |
-
colormap = self.create_pascal_label_colormap()
|
| 275 |
-
H, W = image.shape[-2:]
|
| 276 |
-
|
| 277 |
-
image_ = image*0.3
|
| 278 |
sections = []
|
| 279 |
for i in range(len(tokens)):
|
| 280 |
attn_map = attn_maps[i]
|
| 281 |
-
attn_map_t = np.tile(attn_map[None], (1,3,1,1)) # b, 3, h, w
|
| 282 |
-
attn_map_t = torch.from_numpy(attn_map_t)
|
| 283 |
-
attn_map_t = F.interpolate(attn_map_t, (W, H))
|
| 284 |
-
|
| 285 |
-
color = torch.from_numpy(colormap[i+1][None,:,None,None] / 255.0)
|
| 286 |
-
colored_attn_map = attn_map_t * color
|
| 287 |
-
colored_attn_map = colored_attn_map.to(device=image_.device)
|
| 288 |
-
|
| 289 |
-
image_ += colored_attn_map*0.7
|
| 290 |
sections.append(attn_map)
|
| 291 |
|
| 292 |
section = np.stack(sections)
|
| 293 |
-
np.save(f"temp/seg_map/seg_{save_name}.npy", section)
|
| 294 |
-
|
| 295 |
-
save_image(image_, f"temp/seg_map/seg_{save_name}.png", normalize=True)
|
| 296 |
|
| 297 |
def get_init_noise(self, cfgs, model, cond, batch, uc=None):
|
| 298 |
|
|
@@ -376,8 +343,7 @@ class EulerEDMSampler(EDMSampler):
|
|
| 376 |
local_loss = torch.zeros(1)
|
| 377 |
if save_attn:
|
| 378 |
attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
|
| 379 |
-
|
| 380 |
-
self.save_segment_map(denoised_decode, attn_map, tokens=batch["label"][0], save_name=name)
|
| 381 |
|
| 382 |
d = to_d(x, sigma_hat, denoised)
|
| 383 |
dt = append_dims(next_sigma - sigma_hat, x.ndim)
|
|
@@ -410,7 +376,7 @@ class EulerEDMSampler(EDMSampler):
|
|
| 410 |
|
| 411 |
alpha = 20 * np.sqrt(scales[i])
|
| 412 |
update = aae_enabled
|
| 413 |
-
save_loss =
|
| 414 |
save_attn = detailed and (i == (num_sigmas-1)//2)
|
| 415 |
save_inter = aae_enabled
|
| 416 |
|
|
@@ -452,7 +418,7 @@ class EulerEDMSampler(EDMSampler):
|
|
| 452 |
imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)
|
| 453 |
|
| 454 |
return x
|
| 455 |
-
|
| 456 |
|
| 457 |
class EulerEDMDualSampler(EulerEDMSampler):
|
| 458 |
|
|
@@ -557,9 +523,8 @@ class EulerEDMDualSampler(EulerEDMSampler):
|
|
| 557 |
else:
|
| 558 |
local_loss = torch.zeros(1)
|
| 559 |
if save_attn:
|
| 560 |
-
attn_map = model.model.diffusion_model.save_attn_map(save_name=name,
|
| 561 |
-
|
| 562 |
-
self.save_segment_map(denoised_decode, attn_map, tokens=batch["label"][0], save_name=name)
|
| 563 |
|
| 564 |
d = to_d(x, sigma_hat, denoised)
|
| 565 |
dt = append_dims(next_sigma - sigma_hat, x.ndim)
|
|
@@ -632,7 +597,7 @@ class EulerEDMDualSampler(EulerEDMSampler):
|
|
| 632 |
print(f"Local losses: {local_losses}")
|
| 633 |
|
| 634 |
if len(inters) > 0:
|
| 635 |
-
imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.
|
| 636 |
|
| 637 |
return x
|
| 638 |
|
|
|
|
| 7 |
|
| 8 |
import imageio
|
| 9 |
import torch
|
|
|
|
| 10 |
import numpy as np
|
| 11 |
import torch.nn.functional as F
|
| 12 |
from omegaconf import ListConfig, OmegaConf
|
|
|
|
| 251 |
|
| 252 |
return x
|
| 253 |
|
| 254 |
+
def save_segment_map(self, attn_maps, tokens=None, save_name=None):
|
|
|
|
|
|
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
sections = []
|
| 257 |
for i in range(len(tokens)):
|
| 258 |
attn_map = attn_maps[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
sections.append(attn_map)
|
| 260 |
|
| 261 |
section = np.stack(sections)
|
| 262 |
+
np.save(f"./temp/seg_map/seg_{save_name}.npy", section)
|
|
|
|
|
|
|
| 263 |
|
| 264 |
def get_init_noise(self, cfgs, model, cond, batch, uc=None):
|
| 265 |
|
|
|
|
| 343 |
local_loss = torch.zeros(1)
|
| 344 |
if save_attn:
|
| 345 |
attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
|
| 346 |
+
self.save_segment_map(attn_map, tokens=batch["label"][0], save_name=name)
|
|
|
|
| 347 |
|
| 348 |
d = to_d(x, sigma_hat, denoised)
|
| 349 |
dt = append_dims(next_sigma - sigma_hat, x.ndim)
|
|
|
|
| 376 |
|
| 377 |
alpha = 20 * np.sqrt(scales[i])
|
| 378 |
update = aae_enabled
|
| 379 |
+
save_loss = aae_enabled
|
| 380 |
save_attn = detailed and (i == (num_sigmas-1)//2)
|
| 381 |
save_inter = aae_enabled
|
| 382 |
|
|
|
|
| 418 |
imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)
|
| 419 |
|
| 420 |
return x
|
| 421 |
+
|
| 422 |
|
| 423 |
class EulerEDMDualSampler(EulerEDMSampler):
|
| 424 |
|
|
|
|
| 523 |
else:
|
| 524 |
local_loss = torch.zeros(1)
|
| 525 |
if save_attn:
|
| 526 |
+
attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
|
| 527 |
+
self.save_segment_map(attn_map, tokens=batch["label"][0], save_name=name)
|
|
|
|
| 528 |
|
| 529 |
d = to_d(x, sigma_hat, denoised)
|
| 530 |
dt = append_dims(next_sigma - sigma_hat, x.ndim)
|
|
|
|
| 597 |
print(f"Local losses: {local_losses}")
|
| 598 |
|
| 599 |
if len(inters) > 0:
|
| 600 |
+
imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)
|
| 601 |
|
| 602 |
return x
|
| 603 |
|
temp/attn_map/attn_map_1.png
ADDED
|
temp/attn_map/attn_map_10.png
ADDED
|
temp/attn_map/attn_map_11.png
ADDED
|
temp/attn_map/attn_map_12.png
ADDED
|
temp/attn_map/attn_map_13.png
ADDED
|
temp/attn_map/attn_map_14.png
ADDED
|
temp/attn_map/attn_map_15.png
ADDED
|
temp/attn_map/attn_map_16.png
ADDED
|
temp/attn_map/attn_map_17.png
ADDED
|
temp/attn_map/attn_map_18.png
ADDED
|
temp/attn_map/attn_map_19.png
ADDED
|
temp/attn_map/attn_map_2.png
ADDED
|
temp/attn_map/attn_map_20.png
ADDED
|
temp/attn_map/attn_map_21.png
ADDED
|
temp/attn_map/attn_map_22.png
ADDED
|
temp/attn_map/attn_map_23.png
ADDED
|
temp/attn_map/attn_map_24.png
ADDED
|
temp/attn_map/attn_map_25.png
ADDED
|
temp/attn_map/attn_map_26.png
ADDED
|
temp/attn_map/attn_map_27.png
ADDED
|
temp/attn_map/attn_map_28.png
ADDED
|
temp/attn_map/attn_map_29.png
ADDED
|
temp/attn_map/attn_map_3.png
ADDED
|
temp/attn_map/attn_map_4.png
ADDED
|
temp/attn_map/attn_map_5.png
ADDED
|
temp/attn_map/attn_map_6.png
ADDED
|
temp/attn_map/attn_map_7.png
ADDED
|
temp/attn_map/attn_map_8.png
ADDED
|
temp/attn_map/attn_map_9.png
ADDED
|
temp/seg_map/seg_1.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fed32518482697a99ffa93f572123251ff1a1ce344e6c87108c8e6e5344428cb
|
| 3 |
+
size 32896
|
temp/seg_map/seg_1.png
ADDED
|
temp/seg_map/seg_10.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63ad403c856b5eb96cb1512d84fc5b030ea3fc0a043d3a80d3eeb37472cf343e
|
| 3 |
+
size 24704
|
temp/seg_map/seg_11.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2181e1e25621b1dcb55d2e59df2b168d786021936a99e6e954f0825474414714
|
| 3 |
+
size 24704
|
temp/seg_map/seg_12.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3ddb1524d276a3c1bdb101f9d6239adde117f441205ff44c325e96f11c2cef6
|
| 3 |
+
size 20608
|
temp/seg_map/seg_13.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2f93300e573de546ae8ef84db4f49010b31df98561bd658f8da017cf277cad3
|
| 3 |
+
size 24704
|
temp/seg_map/seg_14.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bc4905adb15d7bf5b7b902f7ed553bbf465e0e875ad376e3d75ddd1109ba800
|
| 3 |
+
size 24704
|
temp/seg_map/seg_15.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4f3815fe49562a34bf0de78eacc0c0508300d35b8edbe27bbe3a58f10779747
|
| 3 |
+
size 24704
|
temp/seg_map/seg_16.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c61b774d7ebf02ba7b6bfcda062cdf600f6d39f4d499b72d63bfa4ab566cb7f6
|
| 3 |
+
size 24704
|
temp/seg_map/seg_17.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86f445d11dc69ea7c45cd409637f630abb56859cedd5204cd761f1a75aece7e9
|
| 3 |
+
size 24704
|
temp/seg_map/seg_18.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19ef25b3f6cd2d76d62c96ccaec8f83c315eadbbba8c40cafb37c9a273d9cc8f
|
| 3 |
+
size 24704
|