harsh99 commited on
Commit
76c374a
Β·
1 Parent(s): f6fbe2a

masked free model support added.

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitignore +3 -1
  2. .gradio/certificate.pem +31 -0
  3. app.py +430 -0
  4. load_model.py +5 -0
  5. mask-based-output/vitonhd-512/unpaired/00654_00.jpg +0 -0
  6. mask-based-output/vitonhd-512/unpaired/01265_00.jpg +0 -0
  7. mask-based-output/vitonhd-512/unpaired/01985_00.jpg +0 -0
  8. mask-based-output/vitonhd-512/unpaired/02023_00.jpg +0 -0
  9. {output β†’ mask-based-output}/vitonhd-512/unpaired/02532_00.jpg +0 -0
  10. mask-based-output/vitonhd-512/unpaired/02944_00.jpg +0 -0
  11. {output β†’ mask-based-output}/vitonhd-512/unpaired/03191_00.jpg +0 -0
  12. {output β†’ mask-based-output}/vitonhd-512/unpaired/03921_00.jpg +0 -0
  13. mask-based-output/vitonhd-512/unpaired/05006_00.jpg +0 -0
  14. mask-based-output/vitonhd-512/unpaired/05378_00.jpg +0 -0
  15. mask-based-output/vitonhd-512/unpaired/07342_00.jpg +0 -0
  16. mask-based-output/vitonhd-512/unpaired/08088_00.jpg +0 -0
  17. mask-based-output/vitonhd-512/unpaired/08239_00.jpg +0 -0
  18. mask-based-output/vitonhd-512/unpaired/08650_00.jpg +0 -0
  19. mask-based-output/vitonhd-512/unpaired/08839_00.jpg +0 -0
  20. mask-based-output/vitonhd-512/unpaired/11085_00.jpg +0 -0
  21. {output β†’ mask-based-output}/vitonhd-512/unpaired/12345_00.jpg +0 -0
  22. {output β†’ mask-based-output}/vitonhd-512/unpaired/12419_00.jpg +0 -0
  23. {output β†’ mask-based-output}/vitonhd-512/unpaired/12562_00.jpg +0 -0
  24. mask-based-output/vitonhd-512/unpaired/14651_00.jpg +0 -0
  25. mask-free-output/vitonhd-512/unpaired/00654_00.jpg +0 -0
  26. mask-free-output/vitonhd-512/unpaired/01265_00.jpg +0 -0
  27. mask-free-output/vitonhd-512/unpaired/01985_00.jpg +0 -0
  28. mask-free-output/vitonhd-512/unpaired/02023_00.jpg +0 -0
  29. mask-free-output/vitonhd-512/unpaired/02532_00.jpg +0 -0
  30. mask-free-output/vitonhd-512/unpaired/02944_00.jpg +0 -0
  31. mask-free-output/vitonhd-512/unpaired/03191_00.jpg +0 -0
  32. mask-free-output/vitonhd-512/unpaired/03921_00.jpg +0 -0
  33. mask-free-output/vitonhd-512/unpaired/05006_00.jpg +0 -0
  34. mask-free-output/vitonhd-512/unpaired/05378_00.jpg +0 -0
  35. mask-free-output/vitonhd-512/unpaired/07342_00.jpg +0 -0
  36. mask-free-output/vitonhd-512/unpaired/08088_00.jpg +0 -0
  37. mask-free-output/vitonhd-512/unpaired/08239_00.jpg +0 -0
  38. mask-free-output/vitonhd-512/unpaired/08650_00.jpg +0 -0
  39. mask-free-output/vitonhd-512/unpaired/08839_00.jpg +0 -0
  40. mask-free-output/vitonhd-512/unpaired/11085_00.jpg +0 -0
  41. mask-free-output/vitonhd-512/unpaired/12345_00.jpg +0 -0
  42. mask-free-output/vitonhd-512/unpaired/12419_00.jpg +0 -0
  43. mask-free-output/vitonhd-512/unpaired/12562_00.jpg +0 -0
  44. mask-free-output/vitonhd-512/unpaired/14651_00.jpg +0 -0
  45. sample_inference.ipynb β†’ mask_based_inference.ipynb +93 -81
  46. mask_free_inference.ipynb +449 -0
  47. output/vitonhd-512/unpaired/00654_00.jpg +0 -0
  48. output/vitonhd-512/unpaired/01265_00.jpg +0 -0
  49. output/vitonhd-512/unpaired/01985_00.jpg +0 -0
  50. output/vitonhd-512/unpaired/02023_00.jpg +0 -0
.gitignore CHANGED
@@ -1,7 +1,9 @@
1
  *inkpunk-diffusion-v1.ckpt
 
2
  *sd-v1-5-inpainting.ckpt
3
  *zalando-hd-resized.zip
4
-
 
5
  # *viton-hd-dataset.zip
6
  viton-hd-dataset/
7
  checkpoints/
 
1
  *inkpunk-diffusion-v1.ckpt
2
+ *instruct-pix2pix*
3
  *sd-v1-5-inpainting.ckpt
4
  *zalando-hd-resized.zip
5
+ *finetuned_weights.safetensors
6
+ *maskfree_finetuned_weights.safetensors
7
  # *viton-hd-dataset.zip
8
  viton-hd-dataset/
9
  checkpoints/
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from PIL import Image
5
+ import numpy as np
6
+ from typing import Optional
7
+
8
+ # Import your custom modules
9
+ from load_model import preload_models_from_standard_weights
10
+ from utils import to_pil_image
11
+
12
+ import inspect
13
+ import os
14
+ from typing import Union
15
+
16
+ import PIL
17
+ import numpy as np
18
+ import torch
19
+ import tqdm
20
+ from diffusers.utils.torch_utils import randn_tensor
21
+
22
+ from utils import (check_inputs_maskfree, get_time_embedding, numpy_to_pil, prepare_image, compute_vae_encodings)
23
+ from ddpm import DDPMSampler
24
+
25
+
26
+ class CatVTONPix2PixPipeline:
27
+ def __init__(
28
+ self,
29
+ weight_dtype=torch.float32,
30
+ device='cuda',
31
+ compile=False,
32
+ skip_safety_check=True,
33
+ use_tf32=True,
34
+ models={},
35
+ ):
36
+ self.device = device
37
+ self.weight_dtype = weight_dtype
38
+ self.skip_safety_check = skip_safety_check
39
+ self.models = models
40
+
41
+ self.generator = torch.Generator(device=device)
42
+ self.noise_scheduler = DDPMSampler(generator=self.generator)
43
+ self.encoder= models.get('encoder', None)
44
+ self.decoder= models.get('decoder', None)
45
+ self.unet=models.get('diffusion', None)
46
+
47
+ # Enable TF32 for faster training on Ampere GPUs
48
+ if use_tf32:
49
+ torch.set_float32_matmul_precision("high")
50
+ torch.backends.cuda.matmul.allow_tf32 = True
51
+
52
+ @torch.no_grad()
53
+ def __call__(
54
+ self,
55
+ image: Union[PIL.Image.Image, torch.Tensor],
56
+ condition_image: Union[PIL.Image.Image, torch.Tensor],
57
+ num_inference_steps: int = 50,
58
+ guidance_scale: float = 2.5,
59
+ height: int = 1024,
60
+ width: int = 768,
61
+ generator=None,
62
+ eta=1.0,
63
+ **kwargs
64
+ ):
65
+ concat_dim = -1 # FIXME: y axis concat
66
+ # Prepare inputs to Tensor
67
+ image, condition_image = check_inputs_maskfree(image, condition_image, width, height)
68
+
69
+ # Ensure consistent dtype for all tensors
70
+ image = prepare_image(image).to(self.device, dtype=self.weight_dtype)
71
+ condition_image = prepare_image(condition_image).to(self.device, dtype=self.weight_dtype)
72
+
73
+ # Encode the image
74
+ image_latent = compute_vae_encodings(image, self.encoder)
75
+ condition_latent = compute_vae_encodings(condition_image, self.encoder)
76
+
77
+ del image, condition_image
78
+
79
+ # Concatenate latents
80
+ condition_latent_concat = torch.cat([image_latent, condition_latent], dim=concat_dim)
81
+
82
+ # Prepare noise
83
+ latents = randn_tensor(
84
+ condition_latent_concat.shape,
85
+ generator=generator,
86
+ device=condition_latent_concat.device,
87
+ dtype=self.weight_dtype,
88
+ )
89
+
90
+ # Prepare timesteps
91
+ self.noise_scheduler.set_inference_timesteps(num_inference_steps)
92
+ timesteps = self.noise_scheduler.timesteps
93
+ latents = self.noise_scheduler.add_noise(latents, timesteps[0])
94
+
95
+ # Classifier-Free Guidance
96
+ if do_classifier_free_guidance := (guidance_scale > 1.0):
97
+ condition_latent_concat = torch.cat(
98
+ [
99
+ torch.cat([image_latent, torch.zeros_like(condition_latent)], dim=concat_dim),
100
+ condition_latent_concat,
101
+ ]
102
+ )
103
+
104
+ num_warmup_steps = 0 # For simple DDPM, no warmup needed
105
+ with tqdm.tqdm(total=num_inference_steps) as progress_bar:
106
+ for i, t in enumerate(timesteps):
107
+ # expand the latents if we are doing classifier free guidance
108
+ latent_model_input = (torch.cat([latents] * 2) if do_classifier_free_guidance else latents)
109
+
110
+ # prepare the input for the inpainting model
111
+ p2p_latent_model_input = torch.cat([latent_model_input, condition_latent_concat], dim=1)
112
+
113
+ # predict the noise residual
114
+ timestep = t.repeat(p2p_latent_model_input.shape[0])
115
+ time_embedding = get_time_embedding(timestep).to(self.device, dtype=self.weight_dtype)
116
+
117
+ noise_pred = self.unet(
118
+ p2p_latent_model_input,
119
+ time_embedding
120
+ )
121
+
122
+ # perform guidance
123
+ if do_classifier_free_guidance:
124
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
125
+ noise_pred = noise_pred_uncond + guidance_scale * (
126
+ noise_pred_text - noise_pred_uncond
127
+ )
128
+
129
+ # compute the previous noisy sample x_t -> x_t-1
130
+ latents = self.noise_scheduler.step(
131
+ t, latents, noise_pred
132
+ )
133
+
134
+ # call the callback, if provided
135
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps):
136
+ progress_bar.update()
137
+
138
+ # Decode the final latents
139
+ latents = latents.split(latents.shape[concat_dim] // 2, dim=concat_dim)[0]
140
+ image = self.decoder(latents.to(self.device, dtype=self.weight_dtype))
141
+ image = (image / 2 + 0.5).clamp(0, 1)
142
+ image = image.cpu().permute(0, 2, 3, 1).float().numpy()
143
+ image = numpy_to_pil(image)
144
+
145
+ return image
146
+
147
+ def load_models():
148
+ try:
149
+ print("πŸš€ Starting model loading process...")
150
+
151
+ # Check CUDA availability
152
+ cuda_available = torch.cuda.is_available()
153
+ print(f"CUDA available: {cuda_available}")
154
+ if cuda_available:
155
+ print(f"CUDA device: {torch.cuda.get_device_name()}")
156
+ free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
157
+ print(f"Available CUDA memory: {free_memory / 1e9:.2f} GB")
158
+
159
+ device = "cuda" if cuda_available else "cpu"
160
+
161
+ # Check if model files exist
162
+ ckpt_path = "instruct-pix2pix-00-22000.ckpt"
163
+ finetune_path = "maskfree_finetuned_weights.safetensors"
164
+
165
+ if not os.path.exists(ckpt_path):
166
+ print(f"❌ Checkpoint file not found: {ckpt_path}")
167
+ return None, None
168
+
169
+ if not os.path.exists(finetune_path):
170
+ print(f"❌ Finetune weights file not found: {finetune_path}")
171
+ return None, None
172
+
173
+ print("πŸ“¦ Loading models from weights...")
174
+
175
+ models = preload_models_from_standard_weights(
176
+ ckpt_path=ckpt_path,
177
+ device=device,
178
+ finetune_weights_path=finetune_path
179
+ )
180
+
181
+ if not models:
182
+ print("❌ Failed to load models")
183
+ return None, None
184
+
185
+ # Convert all models to consistent dtype to avoid mixed precision issues
186
+ weight_dtype = torch.float32 # Use float32 to avoid dtype mismatch
187
+ print(f"Converting models to {weight_dtype}...")
188
+
189
+ # Ensure all models use the same dtype
190
+ for model_name, model in models.items():
191
+ if model is not None:
192
+ try:
193
+ model = model.to(dtype=weight_dtype)
194
+ models[model_name] = model
195
+ print(f"βœ… {model_name} converted to {weight_dtype}")
196
+ except Exception as e:
197
+ print(f"⚠️ Could not convert {model_name} to {weight_dtype}: {e}")
198
+
199
+ print("πŸ”§ Initializing pipeline...")
200
+
201
+ pipeline = CatVTONPix2PixPipeline(
202
+ weight_dtype=weight_dtype,
203
+ device=device,
204
+ skip_safety_check=True,
205
+ models=models,
206
+ )
207
+
208
+ print("βœ… Models and pipeline loaded successfully!")
209
+ return models, pipeline
210
+
211
+ except Exception as e:
212
+ print(f"❌ Error in load_models: {e}")
213
+ import traceback
214
+ traceback.print_exc()
215
+ return None, None
216
+
217
+ def person_example_fn(image_path):
218
+ """Handle person image examples"""
219
+ if image_path:
220
+ return image_path
221
+ return None
222
+
223
+ def create_demo(pipeline=None):
224
+ """Create the Gradio interface"""
225
+
226
+ def submit_function_p2p(
227
+ person_image_path: Optional[str],
228
+ cloth_image_path: Optional[str],
229
+ num_inference_steps: int = 50,
230
+ guidance_scale: float = 2.5,
231
+ seed: int = 42,
232
+ ) -> Optional[Image.Image]:
233
+ """Process virtual try-on inference"""
234
+
235
+ try:
236
+ if not person_image_path or not cloth_image_path:
237
+ gr.Warning("Please upload both person and cloth images!")
238
+ return None
239
+
240
+ if not os.path.exists(person_image_path):
241
+ gr.Error("Person image file not found!")
242
+ return None
243
+
244
+ if not os.path.exists(cloth_image_path):
245
+ gr.Error("Cloth image file not found!")
246
+ return None
247
+
248
+ if pipeline is None:
249
+ gr.Error("Models not loaded! Please restart the application.")
250
+ return None
251
+
252
+ # Load images
253
+ try:
254
+ person_image = Image.open(person_image_path).convert('RGB')
255
+ cloth_image = Image.open(cloth_image_path).convert('RGB')
256
+ except Exception as e:
257
+ gr.Error(f"Error loading images: {str(e)}")
258
+ return None
259
+
260
+ # Set up generator
261
+ generator = torch.Generator(device=pipeline.device)
262
+ if seed != -1:
263
+ generator.manual_seed(seed)
264
+
265
+ print("πŸ”„ Processing virtual try-on...")
266
+
267
+ # Run inference
268
+ with torch.no_grad():
269
+ results = pipeline(
270
+ person_image,
271
+ cloth_image,
272
+ num_inference_steps=num_inference_steps,
273
+ guidance_scale=guidance_scale,
274
+ height=512,
275
+ width=384,
276
+ generator=generator,
277
+ )
278
+
279
+ # Process results
280
+ if isinstance(results, list) and len(results) > 0:
281
+ result = results[0]
282
+ else:
283
+ result = results
284
+
285
+ return result
286
+
287
+ except Exception as e:
288
+ print(f"❌ Error in submit_function_p2p: {e}")
289
+ import traceback
290
+ traceback.print_exc()
291
+ gr.Error(f"Error during inference: {str(e)}")
292
+ return None
293
+
294
+ # Custom CSS for better styling
295
+ css = """
296
+ .gradio-container {
297
+ max-width: 1200px !important;
298
+ }
299
+ .image-container {
300
+ max-height: 600px;
301
+ }
302
+ """
303
+
304
+ with gr.Blocks(css=css, title="Virtual Try-On") as demo:
305
+ gr.HTML("""
306
+ <div style="text-align: center; margin-bottom: 20px;">
307
+ <h1>πŸ§₯ Virtual Try-On with CatVTON</h1>
308
+ <p>Upload a person image and a clothing item to see how they look together!</p>
309
+ </div>
310
+ """)
311
+
312
+ with gr.Tab("Mask-Free Virtual Try-On"):
313
+ with gr.Row():
314
+ with gr.Column(scale=1, min_width=350):
315
+ with gr.Row():
316
+ image_path_p2p = gr.Image(
317
+ type="filepath",
318
+ interactive=True,
319
+ visible=False,
320
+ )
321
+ person_image_p2p = gr.Image(
322
+ interactive=True,
323
+ label="Person Image",
324
+ type="filepath",
325
+ elem_classes=["image-container"]
326
+ )
327
+
328
+ with gr.Row():
329
+ cloth_image_p2p = gr.Image(
330
+ interactive=True,
331
+ label="Clothing Image",
332
+ type="filepath",
333
+ elem_classes=["image-container"]
334
+ )
335
+
336
+ submit_p2p = gr.Button("✨ Generate Try-On", variant="primary", size="lg")
337
+
338
+ gr.Markdown(
339
+ '<center><span style="color: #FF6B6B; font-weight: bold;">⚠️ Click only once and wait for processing!</span></center>'
340
+ )
341
+
342
+ with gr.Accordion("πŸ”§ Advanced Options", open=False):
343
+ num_inference_steps_p2p = gr.Slider(
344
+ label="Inference Steps",
345
+ minimum=10,
346
+ maximum=100,
347
+ step=5,
348
+ value=50,
349
+ info="More steps = better quality but slower"
350
+ )
351
+ guidance_scale_p2p = gr.Slider(
352
+ label="Guidance Scale",
353
+ minimum=0.0,
354
+ maximum=7.5,
355
+ step=0.5,
356
+ value=2.5,
357
+ info="Higher values = stronger conditioning"
358
+ )
359
+ seed_p2p = gr.Slider(
360
+ label="Seed",
361
+ minimum=-1,
362
+ maximum=10000,
363
+ step=1,
364
+ value=42,
365
+ info="Use -1 for random seed"
366
+ )
367
+
368
+ with gr.Column(scale=2, min_width=500):
369
+ result_image_p2p = gr.Image(
370
+ interactive=False,
371
+ label="Result (Person | Clothing | Generated)",
372
+ elem_classes=["image-container"]
373
+ )
374
+
375
+ gr.Markdown("""
376
+ ### πŸ“‹ Instructions:
377
+ 1. Upload a **person image** (front-facing works best)
378
+ 2. Upload a **clothing item** you want to try on
379
+ 3. Adjust advanced settings if needed
380
+ 4. Click "Generate Try-On" and wait
381
+
382
+ ### πŸ’‘ Tips:
383
+ - Use clear, high-resolution images
384
+ - Person should be facing forward
385
+ - Clothing items work best when laid flat or on a model
386
+ - Try different seeds if you're not satisfied with results
387
+ """)
388
+
389
+ # Event handlers
390
+ image_path_p2p.change(
391
+ person_example_fn,
392
+ inputs=image_path_p2p,
393
+ outputs=person_image_p2p
394
+ )
395
+
396
+ submit_p2p.click(
397
+ submit_function_p2p,
398
+ inputs=[
399
+ person_image_p2p,
400
+ cloth_image_p2p,
401
+ num_inference_steps_p2p,
402
+ guidance_scale_p2p,
403
+ seed_p2p,
404
+ ],
405
+ outputs=result_image_p2p,
406
+ )
407
+
408
+ return demo
409
+
410
+ def app_gradio():
411
+ """Main application function"""
412
+
413
+ # Load models at startup
414
+ print("πŸš€ Loading models...")
415
+ models, pipeline = load_models()
416
+ if not models or not pipeline:
417
+ print("❌ Failed to load models. Please check your model files.")
418
+ return
419
+
420
+ # Create and launch demo
421
+ demo = create_demo(pipeline=pipeline)
422
+ demo.launch(
423
+ share=True,
424
+ show_error=True,
425
+ server_name="0.0.0.0",
426
+ server_port=7860
427
+ )
428
+
429
+ if __name__ == "__main__":
430
+ app_gradio()
load_model.py CHANGED
@@ -78,7 +78,12 @@ def load_finetuned_attention_weights(finetune_weights_path, diffusion, device):
78
 
79
  def preload_models_from_standard_weights(ckpt_path, device, finetune_weights_path=None):
80
  # CatVTON parameters
 
81
  in_channels = 9
 
 
 
 
82
  out_channels = 4
83
 
84
  state_dict=model_converter.load_from_standard_weights(ckpt_path, device)
 
78
 
79
  def preload_models_from_standard_weights(ckpt_path, device, finetune_weights_path=None):
80
  # CatVTON parameters
81
+ # in_channels: 8 for instruct-pix2pix (masked free), 9 for sd-v1-5-inpainting (masked based)
82
  in_channels = 9
83
+
84
+ if 'maskfree' in finetune_weights_path or 'mask_free' in finetune_weights_path:
85
+ in_channels = 8
86
+
87
  out_channels = 4
88
 
89
  state_dict=model_converter.load_from_standard_weights(ckpt_path, device)
mask-based-output/vitonhd-512/unpaired/00654_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/01265_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/01985_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/02023_00.jpg ADDED
{output β†’ mask-based-output}/vitonhd-512/unpaired/02532_00.jpg RENAMED
File without changes
mask-based-output/vitonhd-512/unpaired/02944_00.jpg ADDED
{output β†’ mask-based-output}/vitonhd-512/unpaired/03191_00.jpg RENAMED
File without changes
{output β†’ mask-based-output}/vitonhd-512/unpaired/03921_00.jpg RENAMED
File without changes
mask-based-output/vitonhd-512/unpaired/05006_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/05378_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/07342_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/08088_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/08239_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/08650_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/08839_00.jpg ADDED
mask-based-output/vitonhd-512/unpaired/11085_00.jpg ADDED
{output β†’ mask-based-output}/vitonhd-512/unpaired/12345_00.jpg RENAMED
File without changes
{output β†’ mask-based-output}/vitonhd-512/unpaired/12419_00.jpg RENAMED
File without changes
{output β†’ mask-based-output}/vitonhd-512/unpaired/12562_00.jpg RENAMED
File without changes
mask-based-output/vitonhd-512/unpaired/14651_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/00654_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/01265_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/01985_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/02023_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/02532_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/02944_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/03191_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/03921_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/05006_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/05378_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/07342_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/08088_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/08239_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/08650_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/08839_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/11085_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/12345_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/12419_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/12562_00.jpg ADDED
mask-free-output/vitonhd-512/unpaired/14651_00.jpg ADDED
sample_inference.ipynb β†’ mask_based_inference.ipynb RENAMED
@@ -28,6 +28,76 @@
28
  {
29
  "cell_type": "code",
30
  "execution_count": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "id": "bab24c29",
32
  "metadata": {},
33
  "outputs": [
@@ -183,77 +253,7 @@
183
  },
184
  {
185
  "cell_type": "code",
186
- "execution_count": 3,
187
- "id": "a069151e",
188
- "metadata": {},
189
- "outputs": [
190
- {
191
- "name": "stdout",
192
- "output_type": "stream",
193
- "text": [
194
- "Loaded finetuned weights from finetuned_weights.safetensors\n",
195
- "Loading 0.in_proj.weight\n",
196
- "Loading 0.out_proj.weight\n",
197
- "Loading 0.out_proj.bias\n",
198
- "Loading 8.in_proj.weight\n",
199
- "Loading 8.out_proj.weight\n",
200
- "Loading 8.out_proj.bias\n",
201
- "Loading 16.in_proj.weight\n",
202
- "Loading 16.out_proj.weight\n",
203
- "Loading 16.out_proj.bias\n",
204
- "Loading 24.in_proj.weight\n",
205
- "Loading 24.out_proj.weight\n",
206
- "Loading 24.out_proj.bias\n",
207
- "Loading 32.in_proj.weight\n",
208
- "Loading 32.out_proj.weight\n",
209
- "Loading 32.out_proj.bias\n",
210
- "Loading 40.in_proj.weight\n",
211
- "Loading 40.out_proj.weight\n",
212
- "Loading 40.out_proj.bias\n",
213
- "Loading 48.in_proj.weight\n",
214
- "Loading 48.out_proj.weight\n",
215
- "Loading 48.out_proj.bias\n",
216
- "Loading 56.in_proj.weight\n",
217
- "Loading 56.out_proj.weight\n",
218
- "Loading 56.out_proj.bias\n",
219
- "Loading 64.in_proj.weight\n",
220
- "Loading 64.out_proj.weight\n",
221
- "Loading 64.out_proj.bias\n",
222
- "Loading 72.in_proj.weight\n",
223
- "Loading 72.out_proj.weight\n",
224
- "Loading 72.out_proj.bias\n",
225
- "Loading 80.in_proj.weight\n",
226
- "Loading 80.out_proj.weight\n",
227
- "Loading 80.out_proj.bias\n",
228
- "Loading 88.in_proj.weight\n",
229
- "Loading 88.out_proj.weight\n",
230
- "Loading 88.out_proj.bias\n",
231
- "Loading 96.in_proj.weight\n",
232
- "Loading 96.out_proj.weight\n",
233
- "Loading 96.out_proj.bias\n",
234
- "Loading 104.in_proj.weight\n",
235
- "Loading 104.out_proj.weight\n",
236
- "Loading 104.out_proj.bias\n",
237
- "Loading 112.in_proj.weight\n",
238
- "Loading 112.out_proj.weight\n",
239
- "Loading 112.out_proj.bias\n",
240
- "Loading 120.in_proj.weight\n",
241
- "Loading 120.out_proj.weight\n",
242
- "Loading 120.out_proj.bias\n",
243
- "\n",
244
- "Attention module weights loaded from {finetune_weights_path} successfully.\n"
245
- ]
246
- }
247
- ],
248
- "source": [
249
- "import load_model\n",
250
- "\n",
251
- "models=load_model.preload_models_from_standard_weights(ckpt_path=\"sd-v1-5-inpainting.ckpt\", device=\"cuda\", finetune_weights_path=\"finetuned_weights.safetensors\")"
252
- ]
253
- },
254
- {
255
- "cell_type": "code",
256
- "execution_count": null,
257
  "id": "a729bf46",
258
  "metadata": {},
259
  "outputs": [
@@ -268,15 +268,27 @@
268
  "name": "stderr",
269
  "output_type": "stream",
270
  "text": [
271
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:11<00:00, 4.48it/s]\n",
272
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:10<00:00, 4.55it/s]\n",
273
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:11<00:00, 4.43it/s]\n",
274
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:11<00:00, 4.53it/s]\n",
275
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:11<00:00, 4.53it/s]\n",
276
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:11<00:00, 4.51it/s]\n",
277
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:10<00:00, 4.57it/s]\n",
278
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:11<00:00, 4.51it/s]\n",
279
- " 40%|β–ˆβ–ˆβ–ˆβ–ˆ | 8/20 [01:32<02:17, 11.49s/it]"
 
 
 
 
 
 
 
 
 
 
 
 
280
  ]
281
  }
282
  ],
@@ -299,7 +311,7 @@
299
  " args.__dict__= {\n",
300
  " \"dataset_name\": \"vitonhd\",\n",
301
  " \"data_root_path\": \"./sample_dataset\",\n",
302
- " \"output_dir\": \"./output\",\n",
303
  " \"seed\": 555,\n",
304
  " \"batch_size\": 1,\n",
305
  " \"num_inference_steps\": 50,\n",
 
28
  {
29
  "cell_type": "code",
30
  "execution_count": 2,
31
+ "id": "24bd99d5",
32
+ "metadata": {},
33
+ "outputs": [
34
+ {
35
+ "name": "stdout",
36
+ "output_type": "stream",
37
+ "text": [
38
+ "Loaded finetuned weights from finetuned_weights.safetensors\n",
39
+ "Loading 0.in_proj.weight\n",
40
+ "Loading 0.out_proj.weight\n",
41
+ "Loading 0.out_proj.bias\n",
42
+ "Loading 8.in_proj.weight\n",
43
+ "Loading 8.out_proj.weight\n",
44
+ "Loading 8.out_proj.bias\n",
45
+ "Loading 16.in_proj.weight\n",
46
+ "Loading 16.out_proj.weight\n",
47
+ "Loading 16.out_proj.bias\n",
48
+ "Loading 24.in_proj.weight\n",
49
+ "Loading 24.out_proj.weight\n",
50
+ "Loading 24.out_proj.bias\n",
51
+ "Loading 32.in_proj.weight\n",
52
+ "Loading 32.out_proj.weight\n",
53
+ "Loading 32.out_proj.bias\n",
54
+ "Loading 40.in_proj.weight\n",
55
+ "Loading 40.out_proj.weight\n",
56
+ "Loading 40.out_proj.bias\n",
57
+ "Loading 48.in_proj.weight\n",
58
+ "Loading 48.out_proj.weight\n",
59
+ "Loading 48.out_proj.bias\n",
60
+ "Loading 56.in_proj.weight\n",
61
+ "Loading 56.out_proj.weight\n",
62
+ "Loading 56.out_proj.bias\n",
63
+ "Loading 64.in_proj.weight\n",
64
+ "Loading 64.out_proj.weight\n",
65
+ "Loading 64.out_proj.bias\n",
66
+ "Loading 72.in_proj.weight\n",
67
+ "Loading 72.out_proj.weight\n",
68
+ "Loading 72.out_proj.bias\n",
69
+ "Loading 80.in_proj.weight\n",
70
+ "Loading 80.out_proj.weight\n",
71
+ "Loading 80.out_proj.bias\n",
72
+ "Loading 88.in_proj.weight\n",
73
+ "Loading 88.out_proj.weight\n",
74
+ "Loading 88.out_proj.bias\n",
75
+ "Loading 96.in_proj.weight\n",
76
+ "Loading 96.out_proj.weight\n",
77
+ "Loading 96.out_proj.bias\n",
78
+ "Loading 104.in_proj.weight\n",
79
+ "Loading 104.out_proj.weight\n",
80
+ "Loading 104.out_proj.bias\n",
81
+ "Loading 112.in_proj.weight\n",
82
+ "Loading 112.out_proj.weight\n",
83
+ "Loading 112.out_proj.bias\n",
84
+ "Loading 120.in_proj.weight\n",
85
+ "Loading 120.out_proj.weight\n",
86
+ "Loading 120.out_proj.bias\n",
87
+ "\n",
88
+ "Attention module weights loaded from {finetune_weights_path} successfully.\n"
89
+ ]
90
+ }
91
+ ],
92
+ "source": [
93
+ "import load_model\n",
94
+ "\n",
95
+ "models=load_model.preload_models_from_standard_weights(ckpt_path=\"sd-v1-5-inpainting.ckpt\", device=\"cuda\", finetune_weights_path=\"finetuned_weights.safetensors\")"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": 3,
101
  "id": "bab24c29",
102
  "metadata": {},
103
  "outputs": [
 
253
  },
254
  {
255
  "cell_type": "code",
256
+ "execution_count": 4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  "id": "a729bf46",
258
  "metadata": {},
259
  "outputs": [
 
268
  "name": "stderr",
269
  "output_type": "stream",
270
  "text": [
271
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.04it/s]\n",
272
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.32it/s]\n",
273
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.01it/s]\n",
274
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.82it/s]\n",
275
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.86it/s]\n",
276
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.25it/s]\n",
277
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.24it/s]\n",
278
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.89it/s]\n",
279
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.90it/s]\n",
280
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.02it/s]\n",
281
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.40it/s]\n",
282
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.15it/s]\n",
283
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.79it/s]\n",
284
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.07it/s]\n",
285
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.14it/s]\n",
286
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.32it/s]\n",
287
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.13it/s]\n",
288
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.05it/s]\n",
289
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.06it/s]\n",
290
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.09it/s]\n",
291
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [02:28<00:00, 7.40s/it]\n"
292
  ]
293
  }
294
  ],
 
311
  " args.__dict__= {\n",
312
  " \"dataset_name\": \"vitonhd\",\n",
313
  " \"data_root_path\": \"./sample_dataset\",\n",
314
+ " \"output_dir\": \"./mask-based-output\",\n",
315
  " \"seed\": 555,\n",
316
  " \"batch_size\": 1,\n",
317
  " \"num_inference_steps\": 50,\n",
mask_free_inference.ipynb ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "6d50f66c",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Model already downloaded.\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "# check if the model is downloaded, if not download it\n",
19
+ "import os\n",
20
+ "if not os.path.exists(\"instruct-pix2pix-00-22000.ckpt\"):\n",
21
+ " !wget https://huggingface.co/timbrooks/instruct-pix2pix/resolve/main/instruct-pix2pix-00-22000.ckpt\n",
22
+ "else:\n",
23
+ " print(\"Model already downloaded.\")"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 2,
29
+ "id": "3598a305",
30
+ "metadata": {},
31
+ "outputs": [
32
+ {
33
+ "name": "stdout",
34
+ "output_type": "stream",
35
+ "text": [
36
+ "Loaded finetuned weights from maskfree_finetuned_weights.safetensors\n",
37
+ "Loading 0.in_proj.weight\n",
38
+ "Loading 0.out_proj.weight\n",
39
+ "Loading 0.out_proj.bias\n",
40
+ "Loading 8.in_proj.weight\n",
41
+ "Loading 8.out_proj.weight\n",
42
+ "Loading 8.out_proj.bias\n",
43
+ "Loading 16.in_proj.weight\n",
44
+ "Loading 16.out_proj.weight\n",
45
+ "Loading 16.out_proj.bias\n",
46
+ "Loading 24.in_proj.weight\n",
47
+ "Loading 24.out_proj.weight\n",
48
+ "Loading 24.out_proj.bias\n",
49
+ "Loading 32.in_proj.weight\n",
50
+ "Loading 32.out_proj.weight\n",
51
+ "Loading 32.out_proj.bias\n",
52
+ "Loading 40.in_proj.weight\n",
53
+ "Loading 40.out_proj.weight\n",
54
+ "Loading 40.out_proj.bias\n",
55
+ "Loading 48.in_proj.weight\n",
56
+ "Loading 48.out_proj.weight\n",
57
+ "Loading 48.out_proj.bias\n",
58
+ "Loading 56.in_proj.weight\n",
59
+ "Loading 56.out_proj.weight\n",
60
+ "Loading 56.out_proj.bias\n",
61
+ "Loading 64.in_proj.weight\n",
62
+ "Loading 64.out_proj.weight\n",
63
+ "Loading 64.out_proj.bias\n",
64
+ "Loading 72.in_proj.weight\n",
65
+ "Loading 72.out_proj.weight\n",
66
+ "Loading 72.out_proj.bias\n",
67
+ "Loading 80.in_proj.weight\n",
68
+ "Loading 80.out_proj.weight\n",
69
+ "Loading 80.out_proj.bias\n",
70
+ "Loading 88.in_proj.weight\n",
71
+ "Loading 88.out_proj.weight\n",
72
+ "Loading 88.out_proj.bias\n",
73
+ "Loading 96.in_proj.weight\n",
74
+ "Loading 96.out_proj.weight\n",
75
+ "Loading 96.out_proj.bias\n",
76
+ "Loading 104.in_proj.weight\n",
77
+ "Loading 104.out_proj.weight\n",
78
+ "Loading 104.out_proj.bias\n",
79
+ "Loading 112.in_proj.weight\n",
80
+ "Loading 112.out_proj.weight\n",
81
+ "Loading 112.out_proj.bias\n",
82
+ "Loading 120.in_proj.weight\n",
83
+ "Loading 120.out_proj.weight\n",
84
+ "Loading 120.out_proj.bias\n",
85
+ "\n",
86
+ "Attention module weights loaded from {finetune_weights_path} successfully.\n"
87
+ ]
88
+ }
89
+ ],
90
+ "source": [
91
+ "import load_model\n",
92
+ "\n",
93
+ "models=load_model.preload_models_from_standard_weights(ckpt_path=\"instruct-pix2pix-00-22000.ckpt\", device=\"cuda\", finetune_weights_path=\"maskfree_finetuned_weights.safetensors\")"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "execution_count": 3,
99
+ "id": "78e3d8b9",
100
+ "metadata": {},
101
+ "outputs": [
102
+ {
103
+ "name": "stderr",
104
+ "output_type": "stream",
105
+ "text": [
106
+ "/home/mahesh/miniconda3/envs/harsh/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
107
+ " from .autonotebook import tqdm as notebook_tqdm\n"
108
+ ]
109
+ }
110
+ ],
111
+ "source": [
112
+ "import inspect\n",
113
+ "import os\n",
114
+ "from typing import Union\n",
115
+ "\n",
116
+ "import PIL\n",
117
+ "import numpy as np\n",
118
+ "import torch\n",
119
+ "import tqdm\n",
120
+ "from diffusers.utils.torch_utils import randn_tensor\n",
121
+ "\n",
122
+ "from utils import (check_inputs_maskfree, get_time_embedding, numpy_to_pil, prepare_image, compute_vae_encodings)\n",
123
+ "from ddpm import DDPMSampler\n",
124
+ "\n",
125
+ "class CatVTONPix2PixPipeline:\n",
126
+ " def __init__(\n",
127
+ " self, \n",
128
+ " weight_dtype=torch.float32,\n",
129
+ " device='cuda',\n",
130
+ " compile=False,\n",
131
+ " skip_safety_check=True,\n",
132
+ " use_tf32=True,\n",
133
+ " models={},\n",
134
+ " ):\n",
135
+ " self.device = device\n",
136
+ " self.weight_dtype = weight_dtype\n",
137
+ " self.skip_safety_check = skip_safety_check\n",
138
+ " self.models = models\n",
139
+ "\n",
140
+ " self.generator = torch.Generator(device=device)\n",
141
+ " self.noise_scheduler = DDPMSampler(generator=self.generator)\n",
142
+ " # self.vae = AutoencoderKL.from_pretrained(\"stabilityai/sd-vae-ft-mse\").to(device, dtype=weight_dtype)\n",
143
+ " self.encoder= models.get('encoder', None)\n",
144
+ " self.decoder= models.get('decoder', None)\n",
145
+ " \n",
146
+ " self.unet=models.get('diffusion', None) \n",
147
+ " # # Enable TF32 for faster training on Ampere GPUs (A100 and RTX 30 series).\n",
148
+ " if use_tf32:\n",
149
+ " torch.set_float32_matmul_precision(\"high\")\n",
150
+ " torch.backends.cuda.matmul.allow_tf32 = True\n",
151
+ "\n",
152
+ " @torch.no_grad()\n",
153
+ " def __call__(\n",
154
+ " self, \n",
155
+ " image: Union[PIL.Image.Image, torch.Tensor],\n",
156
+ " condition_image: Union[PIL.Image.Image, torch.Tensor],\n",
157
+ " num_inference_steps: int = 50,\n",
158
+ " guidance_scale: float = 2.5,\n",
159
+ " height: int = 1024,\n",
160
+ " width: int = 768,\n",
161
+ " generator=None,\n",
162
+ " eta=1.0,\n",
163
+ " **kwargs\n",
164
+ " ):\n",
165
+ " concat_dim = -1 # FIXME: y axis concat\n",
166
+ " # Prepare inputs to Tensor\n",
167
+ " image, condition_image = check_inputs_maskfree(image, condition_image, width, height)\n",
168
+ " \n",
169
+ " image = prepare_image(image).to(self.device, dtype=self.weight_dtype)\n",
170
+ " \n",
171
+ " condition_image = prepare_image(condition_image).to(self.device, dtype=self.weight_dtype)\n",
172
+ " \n",
173
+ " # Encode the image\n",
174
+ " image_latent = compute_vae_encodings(image, self.encoder)\n",
175
+ " condition_latent = compute_vae_encodings(condition_image, self.encoder)\n",
176
+ " \n",
177
+ " del image, condition_image\n",
178
+ " # Concatenate latents\n",
179
+ " # Concatenate latents\n",
180
+ " condition_latent_concat = torch.cat([image_latent, condition_latent], dim=concat_dim)\n",
181
+ " # Prepare noise\n",
182
+ " latents = randn_tensor(\n",
183
+ " condition_latent_concat.shape,\n",
184
+ " generator=generator,\n",
185
+ " device=condition_latent_concat.device,\n",
186
+ " dtype=self.weight_dtype,\n",
187
+ " )\n",
188
+ " # Prepare timesteps\n",
189
+ " self.noise_scheduler.set_inference_timesteps(num_inference_steps)\n",
190
+ " timesteps = self.noise_scheduler.timesteps\n",
191
+ " # latents = latents * self.noise_scheduler.init_noise_sigma\n",
192
+ " latents = self.noise_scheduler.add_noise(latents, timesteps[0])\n",
193
+ " \n",
194
+ " # Classifier-Free Guidance\n",
195
+ " if do_classifier_free_guidance := (guidance_scale > 1.0):\n",
196
+ " condition_latent_concat = torch.cat(\n",
197
+ " [\n",
198
+ " torch.cat([image_latent, torch.zeros_like(condition_latent)], dim=concat_dim),\n",
199
+ " condition_latent_concat,\n",
200
+ " ]\n",
201
+ " )\n",
202
+ "\n",
203
+ " num_warmup_steps = 0 # For simple DDPM, no warmup needed\n",
204
+ " with tqdm(total=num_inference_steps) as progress_bar:\n",
205
+ " for i, t in enumerate(timesteps):\n",
206
+ " # expand the latents if we are doing classifier free guidance\n",
207
+ " \n",
208
+ " latent_model_input = (torch.cat([latents] * 2) if do_classifier_free_guidance else latents)\n",
209
+ "\n",
210
+ " # prepare the input for the inpainting model\n",
211
+ " \n",
212
+ " p2p_latent_model_input = torch.cat([latent_model_input, condition_latent_concat], dim=1)\n",
213
+ " # predict the noise residual\n",
214
+ " \n",
215
+ " timestep = t.repeat(p2p_latent_model_input.shape[0])\n",
216
+ " time_embedding = get_time_embedding(timestep).to(self.device, dtype=self.weight_dtype)\n",
217
+ "\n",
218
+ " noise_pred = self.unet(\n",
219
+ " p2p_latent_model_input,\n",
220
+ " time_embedding\n",
221
+ " )\n",
222
+ " # perform guidance\n",
223
+ " if do_classifier_free_guidance:\n",
224
+ " noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)\n",
225
+ " noise_pred = noise_pred_uncond + guidance_scale * (\n",
226
+ " noise_pred_text - noise_pred_uncond\n",
227
+ " )\n",
228
+ " # compute the previous noisy sample x_t -> x_t-1\n",
229
+ " latents = self.noise_scheduler.step(\n",
230
+ " t, latents, noise_pred\n",
231
+ " )\n",
232
+ " # call the callback, if provided\n",
233
+ " if i == len(timesteps) - 1 or (\n",
234
+ " (i + 1) > num_warmup_steps\n",
235
+ " ):\n",
236
+ " progress_bar.update()\n",
237
+ "\n",
238
+ " # Decode the final latents\n",
239
+ " latents = latents.split(latents.shape[concat_dim] // 2, dim=concat_dim)[0]\n",
240
+ " # latents = 1 / self.vae.config.scaling_factor * latents\n",
241
+ " # image = self.vae.decode(latents.to(self.device, dtype=self.weight_dtype)).sample\n",
242
+ " image = self.decoder(latents.to(self.device, dtype=self.weight_dtype))\n",
243
+ " image = (image / 2 + 0.5).clamp(0, 1)\n",
244
+ " # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16\n",
245
+ " image = image.cpu().permute(0, 2, 3, 1).float().numpy()\n",
246
+ " image = numpy_to_pil(image)\n",
247
+ " \n",
248
+ " return image\n"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 4,
254
+ "id": "5627b2d2",
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "name": "stdout",
259
+ "output_type": "stream",
260
+ "text": [
261
+ "Dataset vitonhd loaded, total 20 pairs.\n"
262
+ ]
263
+ },
264
+ {
265
+ "name": "stderr",
266
+ "output_type": "stream",
267
+ "text": [
268
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.12it/s]\n",
269
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.31it/s]\n",
270
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.09it/s]\n",
271
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.98it/s]\n",
272
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.01it/s]\n",
273
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.13it/s]\n",
274
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.28it/s]\n",
275
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 7.13it/s]\n",
276
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.17it/s]\n",
277
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.97it/s]\n",
278
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.17it/s]\n",
279
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.38it/s]\n",
280
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.20it/s]\n",
281
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.92it/s]\n",
282
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.71it/s]\n",
283
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.25it/s]\n",
284
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:06<00:00, 7.49it/s]\n",
285
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.87it/s]\n",
286
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.89it/s]\n",
287
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50/50 [00:07<00:00, 6.92it/s]\n",
288
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [02:26<00:00, 7.35s/it]\n"
289
+ ]
290
+ }
291
+ ],
292
+ "source": [
293
+ "import os\n",
294
+ "import torch\n",
295
+ "import argparse\n",
296
+ "from torch.utils.data import DataLoader\n",
297
+ "from VITON_Dataset import VITONHDTestDataset\n",
298
+ "from tqdm import tqdm\n",
299
+ "from PIL import Image\n",
300
+ "\n",
301
+ "from utils import to_pil_image\n",
302
+ "\n",
303
+ "@torch.no_grad()\n",
304
+ "def main():\n",
305
+ " args=argparse.Namespace()\n",
306
+ " args.__dict__= {\n",
307
+ " \"dataset_name\": \"vitonhd\",\n",
308
+ " \"data_root_path\": \"./sample_dataset\",\n",
309
+ " \"output_dir\": \"./mask-free-output\",\n",
310
+ " \"seed\": 555,\n",
311
+ " \"batch_size\": 1,\n",
312
+ " \"num_inference_steps\": 50,\n",
313
+ " \"guidance_scale\": 2.5,\n",
314
+ " \"width\": 384,\n",
315
+ " \"height\": 512,\n",
316
+ " \"eval_pair\": False,\n",
317
+ " \"concat_eval_results\": True,\n",
318
+ " \"allow_tf32\": True,\n",
319
+ " \"dataloader_num_workers\": 4,\n",
320
+ " \"mixed_precision\": 'no',\n",
321
+ " \"concat_axis\": 'y',\n",
322
+ " \"enable_condition_noise\": True,\n",
323
+ " \"is_train\": False\n",
324
+ " }\n",
325
+ "\n",
326
+ " # Pipeline\n",
327
+ " pipeline = CatVTONPix2PixPipeline(\n",
328
+ " weight_dtype={\n",
329
+ " \"no\": torch.float32,\n",
330
+ " \"fp16\": torch.float16,\n",
331
+ " \"bf16\": torch.bfloat16,\n",
332
+ " }[args.mixed_precision],\n",
333
+ " device=\"cuda\",\n",
334
+ " skip_safety_check=True,\n",
335
+ " models=models,\n",
336
+ " )\n",
337
+ " # Dataset\n",
338
+ " if args.dataset_name == \"vitonhd\":\n",
339
+ " dataset = VITONHDTestDataset(args)\n",
340
+ " else:\n",
341
+ " raise ValueError(f\"Invalid dataset name {args.dataset}.\")\n",
342
+ " print(f\"Dataset {args.dataset_name} loaded, total {len(dataset)} pairs.\")\n",
343
+ " dataloader = DataLoader(\n",
344
+ " dataset,\n",
345
+ " batch_size=args.batch_size,\n",
346
+ " shuffle=False,\n",
347
+ " num_workers=args.dataloader_num_workers\n",
348
+ " )\n",
349
+ " \n",
350
+ " # Inference\n",
351
+ " generator = torch.Generator(device='cuda').manual_seed(args.seed)\n",
352
+ " args.output_dir = os.path.join(args.output_dir, f\"{args.dataset_name}-{args.height}\", \"paired\" if args.eval_pair else \"unpaired\")\n",
353
+ " if not os.path.exists(args.output_dir):\n",
354
+ " os.makedirs(args.output_dir)\n",
355
+ " \n",
356
+ " for batch in tqdm(dataloader):\n",
357
+ " person_images = batch['person']\n",
358
+ " cloth_images = batch['cloth']\n",
359
+ "\n",
360
+ " results = pipeline(\n",
361
+ " person_images,\n",
362
+ " cloth_images,\n",
363
+ " num_inference_steps=args.num_inference_steps,\n",
364
+ " guidance_scale=args.guidance_scale,\n",
365
+ " height=args.height,\n",
366
+ " width=args.width,\n",
367
+ " generator=generator,\n",
368
+ " )\n",
369
+ " \n",
370
+ " if args.concat_eval_results:\n",
371
+ " person_images = to_pil_image(person_images)\n",
372
+ " cloth_images = to_pil_image(cloth_images)\n",
373
+ " for i, result in enumerate(results):\n",
374
+ " person_name = batch['person_name'][i]\n",
375
+ " output_path = os.path.join(args.output_dir, person_name)\n",
376
+ " if not os.path.exists(os.path.dirname(output_path)):\n",
377
+ " os.makedirs(os.path.dirname(output_path))\n",
378
+ " if args.concat_eval_results:\n",
379
+ " w, h = result.size\n",
380
+ " concated_result = Image.new('RGB', (w*3, h))\n",
381
+ " concated_result.paste(person_images[i], (0, 0))\n",
382
+ " concated_result.paste(cloth_images[i], (w, 0)) \n",
383
+ " concated_result.paste(result, (w*2, 0))\n",
384
+ " result = concated_result\n",
385
+ " result.save(output_path)\n",
386
+ "\n",
387
+ "if __name__ == \"__main__\":\n",
388
+ " main()"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "id": "39537851",
395
+ "metadata": {},
396
+ "outputs": [],
397
+ "source": []
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": null,
402
+ "id": "22fb6113",
403
+ "metadata": {},
404
+ "outputs": [],
405
+ "source": []
406
+ },
407
+ {
408
+ "cell_type": "code",
409
+ "execution_count": null,
410
+ "id": "1c374cc6",
411
+ "metadata": {},
412
+ "outputs": [],
413
+ "source": []
414
+ },
415
+ {
416
+ "cell_type": "code",
417
+ "execution_count": null,
418
+ "id": "bddce5df",
419
+ "metadata": {
420
+ "vscode": {
421
+ "languageId": "markdown"
422
+ }
423
+ },
424
+ "outputs": [],
425
+ "source": []
426
+ }
427
+ ],
428
+ "metadata": {
429
+ "kernelspec": {
430
+ "display_name": "harsh",
431
+ "language": "python",
432
+ "name": "python3"
433
+ },
434
+ "language_info": {
435
+ "codemirror_mode": {
436
+ "name": "ipython",
437
+ "version": 3
438
+ },
439
+ "file_extension": ".py",
440
+ "mimetype": "text/x-python",
441
+ "name": "python",
442
+ "nbconvert_exporter": "python",
443
+ "pygments_lexer": "ipython3",
444
+ "version": "3.10.18"
445
+ }
446
+ },
447
+ "nbformat": 4,
448
+ "nbformat_minor": 5
449
+ }
output/vitonhd-512/unpaired/00654_00.jpg DELETED
Binary file (67.8 kB)
 
output/vitonhd-512/unpaired/01265_00.jpg DELETED
Binary file (35.2 kB)
 
output/vitonhd-512/unpaired/01985_00.jpg DELETED
Binary file (42.8 kB)
 
output/vitonhd-512/unpaired/02023_00.jpg DELETED
Binary file (40.4 kB)