diff --git a/.gitignore b/.gitignore
index 7d5c8833df2de7411771e3b3588bfe7180cfd019..8b73bce0495bebe68e851e5551bdf5a7c05b87dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,9 @@
 *inkpunk-diffusion-v1.ckpt
+*instruct-pix2pix*
 *sd-v1-5-inpainting.ckpt
 *zalando-hd-resized.zip
-
+*finetuned_weights.safetensors
+*maskfree_finetuned_weights.safetensors
 # *viton-hd-dataset.zip
 viton-hd-dataset/
 checkpoints/
diff --git a/.gradio/certificate.pem b/.gradio/certificate.pem
new file mode 100644
index 0000000000000000000000000000000000000000..b85c8037f6b60976b2546fdbae88312c5246d9a3
--- /dev/null
+++ b/.gradio/certificate.pem
@@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..44475ab8ce3d4510a8f1e3ace822b08f4cf3b202
--- /dev/null
+++ b/app.py
@@ -0,0 +1,430 @@
+import os
+import torch
+import gradio as gr
+from PIL import Image
+import numpy as np
+from typing import Optional
+
+# Import your custom modules
+from load_model import preload_models_from_standard_weights
+from utils import to_pil_image
+
+import inspect
+import os
+from typing import Union
+
+import PIL
+import numpy as np
+import torch
+import tqdm
+from diffusers.utils.torch_utils import randn_tensor
+
+from utils import (check_inputs_maskfree, get_time_embedding, numpy_to_pil, prepare_image, compute_vae_encodings)
+from ddpm import DDPMSampler
+
+
+class CatVTONPix2PixPipeline:
+    def __init__(
+        self, 
+        weight_dtype=torch.float32,
+        device='cuda',
+        compile=False,
+        skip_safety_check=True,
+        use_tf32=True,
+        models={},
+    ):
+        self.device = device
+        self.weight_dtype = weight_dtype
+        self.skip_safety_check = skip_safety_check
+        self.models = models
+
+        self.generator = torch.Generator(device=device)
+        self.noise_scheduler = DDPMSampler(generator=self.generator)
+        self.encoder= models.get('encoder', None)
+        self.decoder= models.get('decoder', None)
+        self.unet=models.get('diffusion', None)  
+        
+        # Enable TF32 for faster training on Ampere GPUs
+        if use_tf32:
+            torch.set_float32_matmul_precision("high")
+            torch.backends.cuda.matmul.allow_tf32 = True
+
+    @torch.no_grad()
+    def __call__(
+        self, 
+        image: Union[PIL.Image.Image, torch.Tensor],
+        condition_image: Union[PIL.Image.Image, torch.Tensor],
+        num_inference_steps: int = 50,
+        guidance_scale: float = 2.5,
+        height: int = 1024,
+        width: int = 768,
+        generator=None,
+        eta=1.0,
+        **kwargs
+    ):
+        concat_dim = -1  # FIXME: y axis concat
+        # Prepare inputs to Tensor
+        image, condition_image = check_inputs_maskfree(image, condition_image, width, height)
+        
+        # Ensure consistent dtype for all tensors
+        image = prepare_image(image).to(self.device, dtype=self.weight_dtype)
+        condition_image = prepare_image(condition_image).to(self.device, dtype=self.weight_dtype)
+        
+        # Encode the image
+        image_latent = compute_vae_encodings(image, self.encoder)
+        condition_latent = compute_vae_encodings(condition_image, self.encoder)
+        
+        del image, condition_image
+        
+        # Concatenate latents
+        condition_latent_concat = torch.cat([image_latent, condition_latent], dim=concat_dim)
+        
+        # Prepare noise
+        latents = randn_tensor(
+            condition_latent_concat.shape,
+            generator=generator,
+            device=condition_latent_concat.device,
+            dtype=self.weight_dtype,
+        )
+        
+        # Prepare timesteps
+        self.noise_scheduler.set_inference_timesteps(num_inference_steps)
+        timesteps = self.noise_scheduler.timesteps
+        latents = self.noise_scheduler.add_noise(latents, timesteps[0])
+        
+        # Classifier-Free Guidance
+        if do_classifier_free_guidance := (guidance_scale > 1.0):
+            condition_latent_concat = torch.cat(
+                [
+                    torch.cat([image_latent, torch.zeros_like(condition_latent)], dim=concat_dim),
+                    condition_latent_concat,
+                ]
+            )
+
+        num_warmup_steps = 0  # For simple DDPM, no warmup needed
+        with tqdm.tqdm(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = (torch.cat([latents] * 2) if do_classifier_free_guidance else latents)
+
+                # prepare the input for the inpainting model
+                p2p_latent_model_input = torch.cat([latent_model_input, condition_latent_concat], dim=1)
+                
+                # predict the noise residual
+                timestep = t.repeat(p2p_latent_model_input.shape[0])
+                time_embedding = get_time_embedding(timestep).to(self.device, dtype=self.weight_dtype)
+
+                noise_pred = self.unet(
+                    p2p_latent_model_input,
+                    time_embedding
+                )
+                
+                # perform guidance
+                if do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (
+                        noise_pred_text - noise_pred_uncond
+                    )
+                
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.noise_scheduler.step(
+                    t, latents, noise_pred
+                )
+                
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps):
+                    progress_bar.update()
+
+        # Decode the final latents
+        latents = latents.split(latents.shape[concat_dim] // 2, dim=concat_dim)[0]
+        image = self.decoder(latents.to(self.device, dtype=self.weight_dtype))
+        image = (image / 2 + 0.5).clamp(0, 1)
+        image = image.cpu().permute(0, 2, 3, 1).float().numpy()
+        image = numpy_to_pil(image)
+        
+        return image
+    
+def load_models():
+    try:
+        print("🚀 Starting model loading process...")
+        
+        # Check CUDA availability
+        cuda_available = torch.cuda.is_available()
+        print(f"CUDA available: {cuda_available}")
+        if cuda_available:
+            print(f"CUDA device: {torch.cuda.get_device_name()}")
+            free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
+            print(f"Available CUDA memory: {free_memory / 1e9:.2f} GB")
+        
+        device = "cuda" if cuda_available else "cpu"
+        
+        # Check if model files exist
+        ckpt_path = "instruct-pix2pix-00-22000.ckpt"
+        finetune_path = "maskfree_finetuned_weights.safetensors"
+        
+        if not os.path.exists(ckpt_path):
+            print(f"❌ Checkpoint file not found: {ckpt_path}")
+            return None, None
+            
+        if not os.path.exists(finetune_path):
+            print(f"❌ Finetune weights file not found: {finetune_path}")
+            return None, None
+        
+        print("📦 Loading models from weights...")
+        
+        models = preload_models_from_standard_weights(
+            ckpt_path=ckpt_path, 
+            device=device, 
+            finetune_weights_path=finetune_path
+        )
+        
+        if not models:
+            print("❌ Failed to load models")
+            return None, None
+        
+        # Convert all models to consistent dtype to avoid mixed precision issues
+        weight_dtype = torch.float32  # Use float32 to avoid dtype mismatch
+        print(f"Converting models to {weight_dtype}...")
+        
+        # Ensure all models use the same dtype
+        for model_name, model in models.items():
+            if model is not None:
+                try:
+                    model = model.to(dtype=weight_dtype)
+                    models[model_name] = model
+                    print(f"✅ {model_name} converted to {weight_dtype}")
+                except Exception as e:
+                    print(f"⚠️ Could not convert {model_name} to {weight_dtype}: {e}")
+        
+        print("🔧 Initializing pipeline...")
+        
+        pipeline = CatVTONPix2PixPipeline(
+            weight_dtype=weight_dtype,
+            device=device,
+            skip_safety_check=True,
+            models=models,
+        )
+        
+        print("✅ Models and pipeline loaded successfully!")
+        return models, pipeline
+        
+    except Exception as e:
+        print(f"❌ Error in load_models: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None
+    
+def person_example_fn(image_path):
+    """Handle person image examples"""
+    if image_path:
+        return image_path
+    return None
+
+def create_demo(pipeline=None):
+    """Create the Gradio interface"""
+    
+    def submit_function_p2p(
+        person_image_path: Optional[str],
+        cloth_image_path: Optional[str], 
+        num_inference_steps: int = 50,
+        guidance_scale: float = 2.5,
+        seed: int = 42,
+    ) -> Optional[Image.Image]:
+        """Process virtual try-on inference"""
+        
+        try:
+            if not person_image_path or not cloth_image_path:
+                gr.Warning("Please upload both person and cloth images!")
+                return None
+            
+            if not os.path.exists(person_image_path):
+                gr.Error("Person image file not found!")
+                return None
+                
+            if not os.path.exists(cloth_image_path):
+                gr.Error("Cloth image file not found!")
+                return None
+            
+            if pipeline is None:
+                gr.Error("Models not loaded! Please restart the application.")
+                return None
+            
+            # Load images
+            try:
+                person_image = Image.open(person_image_path).convert('RGB')
+                cloth_image = Image.open(cloth_image_path).convert('RGB')
+            except Exception as e:
+                gr.Error(f"Error loading images: {str(e)}")
+                return None
+            
+            # Set up generator
+            generator = torch.Generator(device=pipeline.device)
+            if seed != -1:
+                generator.manual_seed(seed)
+            
+            print("🔄 Processing virtual try-on...")
+            
+            # Run inference
+            with torch.no_grad():
+                results = pipeline(
+                    person_image,
+                    cloth_image,
+                    num_inference_steps=num_inference_steps,
+                    guidance_scale=guidance_scale,
+                    height=512,
+                    width=384,
+                    generator=generator,
+                )
+            
+            # Process results
+            if isinstance(results, list) and len(results) > 0:
+                result = results[0]
+            else:
+                result = results
+                
+            return result
+                
+        except Exception as e:
+            print(f"❌ Error in submit_function_p2p: {e}")
+            import traceback
+            traceback.print_exc()
+            gr.Error(f"Error during inference: {str(e)}")
+            return None
+    
+    # Custom CSS for better styling
+    css = """
+    .gradio-container {
+        max-width: 1200px !important;
+    }
+    .image-container {
+        max-height: 600px;
+    }
+    """
+    
+    with gr.Blocks(css=css, title="Virtual Try-On") as demo:
+        gr.HTML("""
+        <div style="text-align: center; margin-bottom: 20px;">
+            <h1>🧥 Virtual Try-On with CatVTON</h1>
+            <p>Upload a person image and a clothing item to see how they look together!</p>
+        </div>
+        """)
+        
+        with gr.Tab("Mask-Free Virtual Try-On"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=350):
+                    with gr.Row():
+                        image_path_p2p = gr.Image(
+                            type="filepath",
+                            interactive=True,
+                            visible=False,
+                        )
+                        person_image_p2p = gr.Image(
+                            interactive=True, 
+                            label="Person Image", 
+                            type="filepath",
+                            elem_classes=["image-container"]
+                        )
+                    
+                    with gr.Row():
+                        cloth_image_p2p = gr.Image(
+                            interactive=True, 
+                            label="Clothing Image", 
+                            type="filepath",
+                            elem_classes=["image-container"]
+                        )
+                    
+                    submit_p2p = gr.Button("✨ Generate Try-On", variant="primary", size="lg")
+                    
+                    gr.Markdown(
+                        '<center><span style="color: #FF6B6B; font-weight: bold;">⚠️ Click only once and wait for processing!</span></center>'
+                    )
+                    
+                    with gr.Accordion("🔧 Advanced Options", open=False):
+                        num_inference_steps_p2p = gr.Slider(
+                            label="Inference Steps", 
+                            minimum=10, 
+                            maximum=100, 
+                            step=5, 
+                            value=50,
+                            info="More steps = better quality but slower"
+                        )
+                        guidance_scale_p2p = gr.Slider(
+                            label="Guidance Scale", 
+                            minimum=0.0, 
+                            maximum=7.5, 
+                            step=0.5, 
+                            value=2.5,
+                            info="Higher values = stronger conditioning"
+                        )
+                        seed_p2p = gr.Slider(
+                            label="Seed", 
+                            minimum=-1, 
+                            maximum=10000, 
+                            step=1, 
+                            value=42,
+                            info="Use -1 for random seed"
+                        )
+                
+                with gr.Column(scale=2, min_width=500):
+                    result_image_p2p = gr.Image(
+                        interactive=False, 
+                        label="Result (Person | Clothing | Generated)",
+                        elem_classes=["image-container"]
+                    )
+                    
+                    gr.Markdown("""
+                    ### 📋 Instructions:
+                    1. Upload a **person image** (front-facing works best)
+                    2. Upload a **clothing item** you want to try on
+                    3. Adjust advanced settings if needed
+                    4. Click "Generate Try-On" and wait
+                    
+                    ### 💡 Tips:
+                    - Use clear, high-resolution images
+                    - Person should be facing forward
+                    - Clothing items work best when laid flat or on a model
+                    - Try different seeds if you're not satisfied with results
+                    """)
+        
+        # Event handlers
+        image_path_p2p.change(
+            person_example_fn, 
+            inputs=image_path_p2p, 
+            outputs=person_image_p2p
+        )
+        
+        submit_p2p.click(
+            submit_function_p2p,
+            inputs=[
+                person_image_p2p,
+                cloth_image_p2p,
+                num_inference_steps_p2p,
+                guidance_scale_p2p,
+                seed_p2p,
+            ],
+            outputs=result_image_p2p,
+        )
+    
+    return demo
+
+def app_gradio():
+    """Main application function"""
+    
+    # Load models at startup
+    print("🚀 Loading models...")
+    models, pipeline = load_models()
+    if not models or not pipeline:
+        print("❌ Failed to load models. Please check your model files.")
+        return
+    
+    # Create and launch demo
+    demo = create_demo(pipeline=pipeline)
+    demo.launch(
+        share=True, 
+        show_error=True,
+        server_name="0.0.0.0",
+        server_port=7860
+    )
+
+if __name__ == "__main__":
+    app_gradio()
\ No newline at end of file
diff --git a/load_model.py b/load_model.py
index a78123c3afff8e45e8cbb34643c85bf837a09788..e49bf97e2f66c968eec2ad12aad737cb7f69cd8f 100644
--- a/load_model.py
+++ b/load_model.py
@@ -78,7 +78,12 @@ def load_finetuned_attention_weights(finetune_weights_path, diffusion, device):
 
 def preload_models_from_standard_weights(ckpt_path, device, finetune_weights_path=None):
     # CatVTON parameters
+    # in_channels: 8 for instruct-pix2pix (masked free), 9 for sd-v1-5-inpainting (masked based)
     in_channels = 9
+    
+    if 'maskfree' in finetune_weights_path or 'mask_free' in finetune_weights_path:
+        in_channels = 8
+        
     out_channels = 4
 
     state_dict=model_converter.load_from_standard_weights(ckpt_path, device)
diff --git a/mask-based-output/vitonhd-512/unpaired/00654_00.jpg b/mask-based-output/vitonhd-512/unpaired/00654_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..426a7db299da61f2b3ef5f63ededfefb4d7d11d5
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/00654_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/01265_00.jpg b/mask-based-output/vitonhd-512/unpaired/01265_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bb9fd57dc6a06337999739f38422e062eea07f40
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/01265_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/01985_00.jpg b/mask-based-output/vitonhd-512/unpaired/01985_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..203e13a647c5b24539d1204a3a23a84c01d2e7cb
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/01985_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/02023_00.jpg b/mask-based-output/vitonhd-512/unpaired/02023_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2c2e344617f59b0b4cdaa75792ad8bf94c2e09b8
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/02023_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/02532_00.jpg b/mask-based-output/vitonhd-512/unpaired/02532_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3d9ab08223986e133915cab6bfb57f042d2355fe
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/02532_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/02944_00.jpg b/mask-based-output/vitonhd-512/unpaired/02944_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b876388b36613dc98ab504dfddbe188c212a9435
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/02944_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/03191_00.jpg b/mask-based-output/vitonhd-512/unpaired/03191_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..33ff2fa436b793b1da60b85a9c6f0b7f811ecd65
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/03191_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/03921_00.jpg b/mask-based-output/vitonhd-512/unpaired/03921_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9a3c3dd63d7fdc642a01491fdae77e3dc799e0c8
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/03921_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/05006_00.jpg b/mask-based-output/vitonhd-512/unpaired/05006_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8d0ede99acdf085dfc0f40c0d230bd8ff8e2bd5e
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/05006_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/05378_00.jpg b/mask-based-output/vitonhd-512/unpaired/05378_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..02152c7544054dc8a9835352af01d6694db9e154
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/05378_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/07342_00.jpg b/mask-based-output/vitonhd-512/unpaired/07342_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1dcb08eb5102e82f00371556d617aa8e2a6c5c3c
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/07342_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/08088_00.jpg b/mask-based-output/vitonhd-512/unpaired/08088_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9123c591eed312567598f03a3ff7812ab24832eb
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/08088_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/08239_00.jpg b/mask-based-output/vitonhd-512/unpaired/08239_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..94dee6120137d9ad8ce7934aedcecc0edfa5ec1d
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/08239_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/08650_00.jpg b/mask-based-output/vitonhd-512/unpaired/08650_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b7dc2c9653cf81c54c593ef7ca60685ab91fbe7
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/08650_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/08839_00.jpg b/mask-based-output/vitonhd-512/unpaired/08839_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..877f8b46e6a0b13e1eddc2e291346493e13712fa
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/08839_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/11085_00.jpg b/mask-based-output/vitonhd-512/unpaired/11085_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..094449ed1949592950abacd98f28894c11097e35
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/11085_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/12345_00.jpg b/mask-based-output/vitonhd-512/unpaired/12345_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..acf474fdfc13b130e59efdf4bfc6db59a24c7fd0
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/12345_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/12419_00.jpg b/mask-based-output/vitonhd-512/unpaired/12419_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0d72b620505d34f870d389756819b639d25d3d4e
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/12419_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/12562_00.jpg b/mask-based-output/vitonhd-512/unpaired/12562_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a6b05a57420b6935c89cca0ba4383ee67f62893a
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/12562_00.jpg differ
diff --git a/mask-based-output/vitonhd-512/unpaired/14651_00.jpg b/mask-based-output/vitonhd-512/unpaired/14651_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ac8736d4a918a60de597152c50c64c2e48bfb90a
Binary files /dev/null and b/mask-based-output/vitonhd-512/unpaired/14651_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/00654_00.jpg b/mask-free-output/vitonhd-512/unpaired/00654_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a0257c01dcb4c1b2c43f4304fb1bf83cd171209e
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/00654_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/01265_00.jpg b/mask-free-output/vitonhd-512/unpaired/01265_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6925069276f205361044cebe1bd2e8902478ee51
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/01265_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/01985_00.jpg b/mask-free-output/vitonhd-512/unpaired/01985_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..048f1cf92344730db8f4f49f60c99cf92de707c9
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/01985_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/02023_00.jpg b/mask-free-output/vitonhd-512/unpaired/02023_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4540be4c51feb07d15436e3786a7025d6bbe2fea
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/02023_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/02532_00.jpg b/mask-free-output/vitonhd-512/unpaired/02532_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ed1c6e7de0df52ce017ef466450c3966eb97814c
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/02532_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/02944_00.jpg b/mask-free-output/vitonhd-512/unpaired/02944_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0d9dc65abdc38eb363483fef47610939c280a817
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/02944_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/03191_00.jpg b/mask-free-output/vitonhd-512/unpaired/03191_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6a82ce367105012bf0234b14b96dfd7c7a9a45e0
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/03191_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/03921_00.jpg b/mask-free-output/vitonhd-512/unpaired/03921_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9ac62a3e26cc9951aeec90f831814ac6e2ad0caf
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/03921_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/05006_00.jpg b/mask-free-output/vitonhd-512/unpaired/05006_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4c71c960b70e1e1e87115fcd8ef848cf22875f80
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/05006_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/05378_00.jpg b/mask-free-output/vitonhd-512/unpaired/05378_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b15f1a8b2a2d6ded57ada390df7e850f31e2d46d
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/05378_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/07342_00.jpg b/mask-free-output/vitonhd-512/unpaired/07342_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e3e40ed8260ac7c922ebc85380b4b1092b965753
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/07342_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/08088_00.jpg b/mask-free-output/vitonhd-512/unpaired/08088_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cb49ffaa155c938ce5000e6d7433aef3d0096cfc
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/08088_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/08239_00.jpg b/mask-free-output/vitonhd-512/unpaired/08239_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c38883abe653dc52c7d93fe6ec0e53db2eb790c5
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/08239_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/08650_00.jpg b/mask-free-output/vitonhd-512/unpaired/08650_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..095f733d4ac8bad22c6deb984f8f2223d397d4d0
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/08650_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/08839_00.jpg b/mask-free-output/vitonhd-512/unpaired/08839_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a7760ea59219e19f3eb36374a88f4cf06ceaf85b
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/08839_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/11085_00.jpg b/mask-free-output/vitonhd-512/unpaired/11085_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4a6bab62aa0728cfb95262949678abee14686fee
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/11085_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/12345_00.jpg b/mask-free-output/vitonhd-512/unpaired/12345_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d7ec5289c266b5fbb4f9353a25ba1a8084801bac
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/12345_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/12419_00.jpg b/mask-free-output/vitonhd-512/unpaired/12419_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cdbe728f7e377c160829578fdde31b4627f2250d
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/12419_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/12562_00.jpg b/mask-free-output/vitonhd-512/unpaired/12562_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..62533abdc0f4a83a91a9ecf50ac7e1fc9f0e008d
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/12562_00.jpg differ
diff --git a/mask-free-output/vitonhd-512/unpaired/14651_00.jpg b/mask-free-output/vitonhd-512/unpaired/14651_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..24c49def22c6c399e4594debf56310936bebfad0
Binary files /dev/null and b/mask-free-output/vitonhd-512/unpaired/14651_00.jpg differ
diff --git a/sample_inference.ipynb b/mask_based_inference.ipynb
similarity index 90%
rename from sample_inference.ipynb
rename to mask_based_inference.ipynb
index 0f6d011cfacc0e4e6a29eb3c817b1977b0d8fd3f..cbdca6f4a4499a7901e6d4b257cc20e8ecbd255c 100644
--- a/sample_inference.ipynb
+++ b/mask_based_inference.ipynb
@@ -28,6 +28,76 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "24bd99d5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded finetuned weights from finetuned_weights.safetensors\n",
+      "Loading 0.in_proj.weight\n",
+      "Loading 0.out_proj.weight\n",
+      "Loading 0.out_proj.bias\n",
+      "Loading 8.in_proj.weight\n",
+      "Loading 8.out_proj.weight\n",
+      "Loading 8.out_proj.bias\n",
+      "Loading 16.in_proj.weight\n",
+      "Loading 16.out_proj.weight\n",
+      "Loading 16.out_proj.bias\n",
+      "Loading 24.in_proj.weight\n",
+      "Loading 24.out_proj.weight\n",
+      "Loading 24.out_proj.bias\n",
+      "Loading 32.in_proj.weight\n",
+      "Loading 32.out_proj.weight\n",
+      "Loading 32.out_proj.bias\n",
+      "Loading 40.in_proj.weight\n",
+      "Loading 40.out_proj.weight\n",
+      "Loading 40.out_proj.bias\n",
+      "Loading 48.in_proj.weight\n",
+      "Loading 48.out_proj.weight\n",
+      "Loading 48.out_proj.bias\n",
+      "Loading 56.in_proj.weight\n",
+      "Loading 56.out_proj.weight\n",
+      "Loading 56.out_proj.bias\n",
+      "Loading 64.in_proj.weight\n",
+      "Loading 64.out_proj.weight\n",
+      "Loading 64.out_proj.bias\n",
+      "Loading 72.in_proj.weight\n",
+      "Loading 72.out_proj.weight\n",
+      "Loading 72.out_proj.bias\n",
+      "Loading 80.in_proj.weight\n",
+      "Loading 80.out_proj.weight\n",
+      "Loading 80.out_proj.bias\n",
+      "Loading 88.in_proj.weight\n",
+      "Loading 88.out_proj.weight\n",
+      "Loading 88.out_proj.bias\n",
+      "Loading 96.in_proj.weight\n",
+      "Loading 96.out_proj.weight\n",
+      "Loading 96.out_proj.bias\n",
+      "Loading 104.in_proj.weight\n",
+      "Loading 104.out_proj.weight\n",
+      "Loading 104.out_proj.bias\n",
+      "Loading 112.in_proj.weight\n",
+      "Loading 112.out_proj.weight\n",
+      "Loading 112.out_proj.bias\n",
+      "Loading 120.in_proj.weight\n",
+      "Loading 120.out_proj.weight\n",
+      "Loading 120.out_proj.bias\n",
+      "\n",
+      "Attention module weights loaded from {finetune_weights_path} successfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import load_model\n",
+    "\n",
+    "models=load_model.preload_models_from_standard_weights(ckpt_path=\"sd-v1-5-inpainting.ckpt\", device=\"cuda\", finetune_weights_path=\"finetuned_weights.safetensors\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "id": "bab24c29",
    "metadata": {},
    "outputs": [
@@ -183,77 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "a069151e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loaded finetuned weights from finetuned_weights.safetensors\n",
-      "Loading 0.in_proj.weight\n",
-      "Loading 0.out_proj.weight\n",
-      "Loading 0.out_proj.bias\n",
-      "Loading 8.in_proj.weight\n",
-      "Loading 8.out_proj.weight\n",
-      "Loading 8.out_proj.bias\n",
-      "Loading 16.in_proj.weight\n",
-      "Loading 16.out_proj.weight\n",
-      "Loading 16.out_proj.bias\n",
-      "Loading 24.in_proj.weight\n",
-      "Loading 24.out_proj.weight\n",
-      "Loading 24.out_proj.bias\n",
-      "Loading 32.in_proj.weight\n",
-      "Loading 32.out_proj.weight\n",
-      "Loading 32.out_proj.bias\n",
-      "Loading 40.in_proj.weight\n",
-      "Loading 40.out_proj.weight\n",
-      "Loading 40.out_proj.bias\n",
-      "Loading 48.in_proj.weight\n",
-      "Loading 48.out_proj.weight\n",
-      "Loading 48.out_proj.bias\n",
-      "Loading 56.in_proj.weight\n",
-      "Loading 56.out_proj.weight\n",
-      "Loading 56.out_proj.bias\n",
-      "Loading 64.in_proj.weight\n",
-      "Loading 64.out_proj.weight\n",
-      "Loading 64.out_proj.bias\n",
-      "Loading 72.in_proj.weight\n",
-      "Loading 72.out_proj.weight\n",
-      "Loading 72.out_proj.bias\n",
-      "Loading 80.in_proj.weight\n",
-      "Loading 80.out_proj.weight\n",
-      "Loading 80.out_proj.bias\n",
-      "Loading 88.in_proj.weight\n",
-      "Loading 88.out_proj.weight\n",
-      "Loading 88.out_proj.bias\n",
-      "Loading 96.in_proj.weight\n",
-      "Loading 96.out_proj.weight\n",
-      "Loading 96.out_proj.bias\n",
-      "Loading 104.in_proj.weight\n",
-      "Loading 104.out_proj.weight\n",
-      "Loading 104.out_proj.bias\n",
-      "Loading 112.in_proj.weight\n",
-      "Loading 112.out_proj.weight\n",
-      "Loading 112.out_proj.bias\n",
-      "Loading 120.in_proj.weight\n",
-      "Loading 120.out_proj.weight\n",
-      "Loading 120.out_proj.bias\n",
-      "\n",
-      "Attention module weights loaded from {finetune_weights_path} successfully.\n"
-     ]
-    }
-   ],
-   "source": [
-    "import load_model\n",
-    "\n",
-    "models=load_model.preload_models_from_standard_weights(ckpt_path=\"sd-v1-5-inpainting.ckpt\", device=\"cuda\", finetune_weights_path=\"finetuned_weights.safetensors\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "a729bf46",
    "metadata": {},
    "outputs": [
@@ -268,15 +268,27 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 50/50 [00:11<00:00,  4.48it/s]\n",
-      "100%|██████████| 50/50 [00:10<00:00,  4.55it/s]\n",
-      "100%|██████████| 50/50 [00:11<00:00,  4.43it/s]\n",
-      "100%|██████████| 50/50 [00:11<00:00,  4.53it/s]\n",
-      "100%|██████████| 50/50 [00:11<00:00,  4.53it/s]\n",
-      "100%|██████████| 50/50 [00:11<00:00,  4.51it/s]\n",
-      "100%|██████████| 50/50 [00:10<00:00,  4.57it/s]\n",
-      "100%|██████████| 50/50 [00:11<00:00,  4.51it/s]\n",
-      " 40%|████      | 8/20 [01:32<02:17, 11.49s/it]"
+      "100%|██████████| 50/50 [00:07<00:00,  7.04it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.32it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.01it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.82it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.86it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.25it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.24it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.89it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.90it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.02it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.40it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.15it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.79it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.07it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.14it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.32it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.13it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.05it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.06it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.09it/s]\n",
+      "100%|██████████| 20/20 [02:28<00:00,  7.40s/it]\n"
      ]
     }
    ],
@@ -299,7 +311,7 @@
     "    args.__dict__= {\n",
     "        \"dataset_name\": \"vitonhd\",\n",
     "        \"data_root_path\": \"./sample_dataset\",\n",
-    "        \"output_dir\": \"./output\",\n",
+    "        \"output_dir\": \"./mask-based-output\",\n",
     "        \"seed\": 555,\n",
     "        \"batch_size\": 1,\n",
     "        \"num_inference_steps\": 50,\n",
diff --git a/mask_free_inference.ipynb b/mask_free_inference.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..65ca8ee668bfbe980f16f97214aad22b48359e37
--- /dev/null
+++ b/mask_free_inference.ipynb
@@ -0,0 +1,449 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6d50f66c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model already downloaded.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# check if the model is downloaded,  if not download it\n",
+    "import os\n",
+    "if not os.path.exists(\"instruct-pix2pix-00-22000.ckpt\"):\n",
+    "    !wget https://huggingface.co/timbrooks/instruct-pix2pix/resolve/main/instruct-pix2pix-00-22000.ckpt\n",
+    "else:\n",
+    "    print(\"Model already downloaded.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3598a305",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded finetuned weights from maskfree_finetuned_weights.safetensors\n",
+      "Loading 0.in_proj.weight\n",
+      "Loading 0.out_proj.weight\n",
+      "Loading 0.out_proj.bias\n",
+      "Loading 8.in_proj.weight\n",
+      "Loading 8.out_proj.weight\n",
+      "Loading 8.out_proj.bias\n",
+      "Loading 16.in_proj.weight\n",
+      "Loading 16.out_proj.weight\n",
+      "Loading 16.out_proj.bias\n",
+      "Loading 24.in_proj.weight\n",
+      "Loading 24.out_proj.weight\n",
+      "Loading 24.out_proj.bias\n",
+      "Loading 32.in_proj.weight\n",
+      "Loading 32.out_proj.weight\n",
+      "Loading 32.out_proj.bias\n",
+      "Loading 40.in_proj.weight\n",
+      "Loading 40.out_proj.weight\n",
+      "Loading 40.out_proj.bias\n",
+      "Loading 48.in_proj.weight\n",
+      "Loading 48.out_proj.weight\n",
+      "Loading 48.out_proj.bias\n",
+      "Loading 56.in_proj.weight\n",
+      "Loading 56.out_proj.weight\n",
+      "Loading 56.out_proj.bias\n",
+      "Loading 64.in_proj.weight\n",
+      "Loading 64.out_proj.weight\n",
+      "Loading 64.out_proj.bias\n",
+      "Loading 72.in_proj.weight\n",
+      "Loading 72.out_proj.weight\n",
+      "Loading 72.out_proj.bias\n",
+      "Loading 80.in_proj.weight\n",
+      "Loading 80.out_proj.weight\n",
+      "Loading 80.out_proj.bias\n",
+      "Loading 88.in_proj.weight\n",
+      "Loading 88.out_proj.weight\n",
+      "Loading 88.out_proj.bias\n",
+      "Loading 96.in_proj.weight\n",
+      "Loading 96.out_proj.weight\n",
+      "Loading 96.out_proj.bias\n",
+      "Loading 104.in_proj.weight\n",
+      "Loading 104.out_proj.weight\n",
+      "Loading 104.out_proj.bias\n",
+      "Loading 112.in_proj.weight\n",
+      "Loading 112.out_proj.weight\n",
+      "Loading 112.out_proj.bias\n",
+      "Loading 120.in_proj.weight\n",
+      "Loading 120.out_proj.weight\n",
+      "Loading 120.out_proj.bias\n",
+      "\n",
+      "Attention module weights loaded from {finetune_weights_path} successfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import load_model\n",
+    "\n",
+    "models=load_model.preload_models_from_standard_weights(ckpt_path=\"instruct-pix2pix-00-22000.ckpt\", device=\"cuda\", finetune_weights_path=\"maskfree_finetuned_weights.safetensors\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "78e3d8b9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mahesh/miniconda3/envs/harsh/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import inspect\n",
+    "import os\n",
+    "from typing import Union\n",
+    "\n",
+    "import PIL\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import tqdm\n",
+    "from diffusers.utils.torch_utils import randn_tensor\n",
+    "\n",
+    "from utils import (check_inputs_maskfree, get_time_embedding, numpy_to_pil, prepare_image, compute_vae_encodings)\n",
+    "from ddpm import DDPMSampler\n",
+    "\n",
+    "class CatVTONPix2PixPipeline:\n",
+    "    def __init__(\n",
+    "        self, \n",
+    "        weight_dtype=torch.float32,\n",
+    "        device='cuda',\n",
+    "        compile=False,\n",
+    "        skip_safety_check=True,\n",
+    "        use_tf32=True,\n",
+    "        models={},\n",
+    "    ):\n",
+    "        self.device = device\n",
+    "        self.weight_dtype = weight_dtype\n",
+    "        self.skip_safety_check = skip_safety_check\n",
+    "        self.models = models\n",
+    "\n",
+    "        self.generator = torch.Generator(device=device)\n",
+    "        self.noise_scheduler = DDPMSampler(generator=self.generator)\n",
+    "        # self.vae = AutoencoderKL.from_pretrained(\"stabilityai/sd-vae-ft-mse\").to(device, dtype=weight_dtype)\n",
+    "        self.encoder= models.get('encoder', None)\n",
+    "        self.decoder= models.get('decoder', None)\n",
+    " \n",
+    "        self.unet=models.get('diffusion', None)  \n",
+    "        # # Enable TF32 for faster training on Ampere GPUs (A100 and RTX 30 series).\n",
+    "        if use_tf32:\n",
+    "            torch.set_float32_matmul_precision(\"high\")\n",
+    "            torch.backends.cuda.matmul.allow_tf32 = True\n",
+    "\n",
+    "    @torch.no_grad()\n",
+    "    def __call__(\n",
+    "        self, \n",
+    "        image: Union[PIL.Image.Image, torch.Tensor],\n",
+    "        condition_image: Union[PIL.Image.Image, torch.Tensor],\n",
+    "        num_inference_steps: int = 50,\n",
+    "        guidance_scale: float = 2.5,\n",
+    "        height: int = 1024,\n",
+    "        width: int = 768,\n",
+    "        generator=None,\n",
+    "        eta=1.0,\n",
+    "        **kwargs\n",
+    "    ):\n",
+    "        concat_dim = -1  # FIXME: y axis concat\n",
+    "        # Prepare inputs to Tensor\n",
+    "        image, condition_image = check_inputs_maskfree(image, condition_image, width, height)\n",
+    "        \n",
+    "        image = prepare_image(image).to(self.device, dtype=self.weight_dtype)\n",
+    "        \n",
+    "        condition_image = prepare_image(condition_image).to(self.device, dtype=self.weight_dtype)\n",
+    "        \n",
+    "        # Encode the image\n",
+    "        image_latent = compute_vae_encodings(image, self.encoder)\n",
+    "        condition_latent = compute_vae_encodings(condition_image, self.encoder)\n",
+    "        \n",
+    "        del image, condition_image\n",
+    "        # Concatenate latents\n",
+    "        # Concatenate latents\n",
+    "        condition_latent_concat = torch.cat([image_latent, condition_latent], dim=concat_dim)\n",
+    "        # Prepare noise\n",
+    "        latents = randn_tensor(\n",
+    "            condition_latent_concat.shape,\n",
+    "            generator=generator,\n",
+    "            device=condition_latent_concat.device,\n",
+    "            dtype=self.weight_dtype,\n",
+    "        )\n",
+    "        # Prepare timesteps\n",
+    "        self.noise_scheduler.set_inference_timesteps(num_inference_steps)\n",
+    "        timesteps = self.noise_scheduler.timesteps\n",
+    "        # latents = latents * self.noise_scheduler.init_noise_sigma\n",
+    "        latents = self.noise_scheduler.add_noise(latents, timesteps[0])\n",
+    "        \n",
+    "        # Classifier-Free Guidance\n",
+    "        if do_classifier_free_guidance := (guidance_scale > 1.0):\n",
+    "            condition_latent_concat = torch.cat(\n",
+    "                [\n",
+    "                    torch.cat([image_latent, torch.zeros_like(condition_latent)], dim=concat_dim),\n",
+    "                    condition_latent_concat,\n",
+    "                ]\n",
+    "            )\n",
+    "\n",
+    "        num_warmup_steps = 0  # For simple DDPM, no warmup needed\n",
+    "        with tqdm(total=num_inference_steps) as progress_bar:\n",
+    "            for i, t in enumerate(timesteps):\n",
+    "                # expand the latents if we are doing classifier free guidance\n",
+    "                \n",
+    "                latent_model_input = (torch.cat([latents] * 2) if do_classifier_free_guidance else latents)\n",
+    "\n",
+    "                # prepare the input for the inpainting model\n",
+    "                \n",
+    "                p2p_latent_model_input = torch.cat([latent_model_input, condition_latent_concat], dim=1)\n",
+    "                # predict the noise residual\n",
+    "                \n",
+    "                timestep = t.repeat(p2p_latent_model_input.shape[0])\n",
+    "                time_embedding = get_time_embedding(timestep).to(self.device, dtype=self.weight_dtype)\n",
+    "\n",
+    "                noise_pred = self.unet(\n",
+    "                    p2p_latent_model_input,\n",
+    "                    time_embedding\n",
+    "                )\n",
+    "                # perform guidance\n",
+    "                if do_classifier_free_guidance:\n",
+    "                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)\n",
+    "                    noise_pred = noise_pred_uncond + guidance_scale * (\n",
+    "                        noise_pred_text - noise_pred_uncond\n",
+    "                    )\n",
+    "                # compute the previous noisy sample x_t -> x_t-1\n",
+    "                latents = self.noise_scheduler.step(\n",
+    "                    t, latents, noise_pred\n",
+    "                )\n",
+    "                # call the callback, if provided\n",
+    "                if i == len(timesteps) - 1 or (\n",
+    "                    (i + 1) > num_warmup_steps\n",
+    "                ):\n",
+    "                    progress_bar.update()\n",
+    "\n",
+    "        # Decode the final latents\n",
+    "        latents = latents.split(latents.shape[concat_dim] // 2, dim=concat_dim)[0]\n",
+    "        # latents = 1 / self.vae.config.scaling_factor * latents\n",
+    "        # image = self.vae.decode(latents.to(self.device, dtype=self.weight_dtype)).sample\n",
+    "        image = self.decoder(latents.to(self.device, dtype=self.weight_dtype))\n",
+    "        image = (image / 2 + 0.5).clamp(0, 1)\n",
+    "        # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16\n",
+    "        image = image.cpu().permute(0, 2, 3, 1).float().numpy()\n",
+    "        image = numpy_to_pil(image)\n",
+    "        \n",
+    "        return image\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5627b2d2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset vitonhd loaded, total 20 pairs.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 50/50 [00:07<00:00,  7.12it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.31it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.09it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.98it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.01it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.13it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.28it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  7.13it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.17it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.97it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.17it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.38it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.20it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.92it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.71it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.25it/s]\n",
+      "100%|██████████| 50/50 [00:06<00:00,  7.49it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.87it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.89it/s]\n",
+      "100%|██████████| 50/50 [00:07<00:00,  6.92it/s]\n",
+      "100%|██████████| 20/20 [02:26<00:00,  7.35s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import argparse\n",
+    "from torch.utils.data import DataLoader\n",
+    "from VITON_Dataset import VITONHDTestDataset\n",
+    "from tqdm import tqdm\n",
+    "from PIL import Image\n",
+    "\n",
+    "from utils import to_pil_image\n",
+    "\n",
+    "@torch.no_grad()\n",
+    "def main():\n",
+    "    args=argparse.Namespace()\n",
+    "    args.__dict__= {\n",
+    "        \"dataset_name\": \"vitonhd\",\n",
+    "        \"data_root_path\": \"./sample_dataset\",\n",
+    "        \"output_dir\": \"./mask-free-output\",\n",
+    "        \"seed\": 555,\n",
+    "        \"batch_size\": 1,\n",
+    "        \"num_inference_steps\": 50,\n",
+    "        \"guidance_scale\": 2.5,\n",
+    "        \"width\": 384,\n",
+    "        \"height\": 512,\n",
+    "        \"eval_pair\": False,\n",
+    "        \"concat_eval_results\": True,\n",
+    "        \"allow_tf32\": True,\n",
+    "        \"dataloader_num_workers\": 4,\n",
+    "        \"mixed_precision\": 'no',\n",
+    "        \"concat_axis\": 'y',\n",
+    "        \"enable_condition_noise\": True,\n",
+    "        \"is_train\": False\n",
+    "    }\n",
+    "\n",
+    "    # Pipeline\n",
+    "    pipeline = CatVTONPix2PixPipeline(\n",
+    "        weight_dtype={\n",
+    "            \"no\": torch.float32,\n",
+    "            \"fp16\": torch.float16,\n",
+    "            \"bf16\": torch.bfloat16,\n",
+    "        }[args.mixed_precision],\n",
+    "        device=\"cuda\",\n",
+    "        skip_safety_check=True,\n",
+    "        models=models,\n",
+    "    )\n",
+    "    # Dataset\n",
+    "    if args.dataset_name == \"vitonhd\":\n",
+    "        dataset = VITONHDTestDataset(args)\n",
+    "    else:\n",
+    "        raise ValueError(f\"Invalid dataset name {args.dataset}.\")\n",
+    "    print(f\"Dataset {args.dataset_name} loaded, total {len(dataset)} pairs.\")\n",
+    "    dataloader = DataLoader(\n",
+    "        dataset,\n",
+    "        batch_size=args.batch_size,\n",
+    "        shuffle=False,\n",
+    "        num_workers=args.dataloader_num_workers\n",
+    "    )\n",
+    "        \n",
+    "    # Inference\n",
+    "    generator = torch.Generator(device='cuda').manual_seed(args.seed)\n",
+    "    args.output_dir = os.path.join(args.output_dir, f\"{args.dataset_name}-{args.height}\", \"paired\" if args.eval_pair else \"unpaired\")\n",
+    "    if not os.path.exists(args.output_dir):\n",
+    "        os.makedirs(args.output_dir)\n",
+    "        \n",
+    "    for batch in tqdm(dataloader):\n",
+    "        person_images = batch['person']\n",
+    "        cloth_images = batch['cloth']\n",
+    "\n",
+    "        results = pipeline(\n",
+    "            person_images,\n",
+    "            cloth_images,\n",
+    "            num_inference_steps=args.num_inference_steps,\n",
+    "            guidance_scale=args.guidance_scale,\n",
+    "            height=args.height,\n",
+    "            width=args.width,\n",
+    "            generator=generator,\n",
+    "        )\n",
+    "        \n",
+    "        if args.concat_eval_results:\n",
+    "            person_images = to_pil_image(person_images)\n",
+    "            cloth_images = to_pil_image(cloth_images)\n",
+    "        for i, result in enumerate(results):\n",
+    "            person_name = batch['person_name'][i]\n",
+    "            output_path = os.path.join(args.output_dir, person_name)\n",
+    "            if not os.path.exists(os.path.dirname(output_path)):\n",
+    "                os.makedirs(os.path.dirname(output_path))\n",
+    "            if args.concat_eval_results:\n",
+    "                w, h = result.size\n",
+    "                concated_result = Image.new('RGB', (w*3, h))\n",
+    "                concated_result.paste(person_images[i], (0, 0))\n",
+    "                concated_result.paste(cloth_images[i], (w, 0))  \n",
+    "                concated_result.paste(result, (w*2, 0))\n",
+    "                result = concated_result\n",
+    "            result.save(output_path)\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    main()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39537851",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22fb6113",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c374cc6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bddce5df",
+   "metadata": {
+    "vscode": {
+     "languageId": "markdown"
+    }
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "harsh",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/output/vitonhd-512/unpaired/00654_00.jpg b/output/vitonhd-512/unpaired/00654_00.jpg
deleted file mode 100644
index beea9eae3f8bc99b552a2d490f250cfedb03bc73..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/00654_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/01265_00.jpg b/output/vitonhd-512/unpaired/01265_00.jpg
deleted file mode 100644
index 82c42b72e955cbdc3be36b874aa63a8dff2f0383..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/01265_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/01985_00.jpg b/output/vitonhd-512/unpaired/01985_00.jpg
deleted file mode 100644
index 33d1b52d4b9c371e830bec1315acbd1e00cd71af..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/01985_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/02023_00.jpg b/output/vitonhd-512/unpaired/02023_00.jpg
deleted file mode 100644
index 4b9c91de6ecfbe21877932bfaacf4b0ad34155d2..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/02023_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/02532_00.jpg b/output/vitonhd-512/unpaired/02532_00.jpg
deleted file mode 100644
index d98925454eb55ca11f2346c13b21728004b54083..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/02532_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/02944_00.jpg b/output/vitonhd-512/unpaired/02944_00.jpg
deleted file mode 100644
index c72ae4a41746ae43ac5b7699bf3cfa2ff1b5b22e..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/02944_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/03191_00.jpg b/output/vitonhd-512/unpaired/03191_00.jpg
deleted file mode 100644
index 0c7132663301f60ceccce0f3345a94ee53d990ce..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/03191_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/03921_00.jpg b/output/vitonhd-512/unpaired/03921_00.jpg
deleted file mode 100644
index 40c20bb982c4fad8c456d09967377d3f8aa72e85..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/03921_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/05006_00.jpg b/output/vitonhd-512/unpaired/05006_00.jpg
deleted file mode 100644
index f64062f354191a62c470516f008851e3272a70a4..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/05006_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/05378_00.jpg b/output/vitonhd-512/unpaired/05378_00.jpg
deleted file mode 100644
index 65f81e92a0434564627b80f8c10d610eace26813..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/05378_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/07342_00.jpg b/output/vitonhd-512/unpaired/07342_00.jpg
deleted file mode 100644
index fdfdcfa7c1b694d52102cb0f0fab0301a2d5bd74..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/07342_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/08088_00.jpg b/output/vitonhd-512/unpaired/08088_00.jpg
deleted file mode 100644
index 4a904a651d1bc42bf6396499645ef24bfbff145f..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/08088_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/08239_00.jpg b/output/vitonhd-512/unpaired/08239_00.jpg
deleted file mode 100644
index 5111327812216399b3d707e6296fa645f217d489..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/08239_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/08650_00.jpg b/output/vitonhd-512/unpaired/08650_00.jpg
deleted file mode 100644
index aee30d8cabe59bacd98ed946f6a658c887e70a1f..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/08650_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/08839_00.jpg b/output/vitonhd-512/unpaired/08839_00.jpg
deleted file mode 100644
index 21dc8c1fa420d984a8ba461744a39887cf3336cd..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/08839_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/11085_00.jpg b/output/vitonhd-512/unpaired/11085_00.jpg
deleted file mode 100644
index 9937e679570c67d1b5f448d078399c0b492553f2..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/11085_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/12345_00.jpg b/output/vitonhd-512/unpaired/12345_00.jpg
deleted file mode 100644
index fa9d559d97feedf8492339dee79aa57c2b390646..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/12345_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/12419_00.jpg b/output/vitonhd-512/unpaired/12419_00.jpg
deleted file mode 100644
index 06417a7a0bfbaa69f8381deb53febfe11baa7265..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/12419_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/12562_00.jpg b/output/vitonhd-512/unpaired/12562_00.jpg
deleted file mode 100644
index dbcfb7a75ab6036f40cba5a5fbf40819f7b54553..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/12562_00.jpg and /dev/null differ
diff --git a/output/vitonhd-512/unpaired/14651_00.jpg b/output/vitonhd-512/unpaired/14651_00.jpg
deleted file mode 100644
index 40c939137e392a1f3fc8bbb2bc7484b0691065ff..0000000000000000000000000000000000000000
Binary files a/output/vitonhd-512/unpaired/14651_00.jpg and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
index e2aa599dac9716c009c00e7885d7d2ccfdd4cf81..e1602c3e2746793c384cf9232d445702e3520dd5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,90 +1,44 @@
-accelerate==1.9.0
-asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1733250440834/work
-beautifulsoup4==4.13.4
-certifi==2025.7.14
-charset-normalizer==3.4.2
-comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1733502965406/work
-contourpy==1.3.2
-cycler==0.12.1
-debugpy @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_debugpy_1752827112/work
-decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1740384970518/work
-diffusers==0.34.0
-exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1746947292760/work
-executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1745502089858/work
-filelock==3.18.0
-fonttools==4.59.0
-fsspec==2025.7.0
-gdown==5.2.0
-hf-xet==1.1.5
-huggingface-hub==0.33.4
-idna==3.10
-importlib_metadata @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_importlib-metadata_1747934053/work
-ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1719845459717/work
-ipython @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_ipython_1748711175/work
-jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1733300866624/work
-Jinja2==3.1.6
-jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1733440914442/work
-jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1748333051527/work
-kagglehub==0.3.12
-kiwisolver==1.4.8
-MarkupSafe==3.0.2
-matplotlib==3.10.3
-matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1733416936468/work
-mpmath==1.3.0
-nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1733325553580/work
-networkx==3.4.2
-numpy==2.2.6
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_packaging_1745345660/work
-pandas==2.3.1
-parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1733271261340/work
-pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1733301927746/work
-pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1733327343728/work
-pillow==11.3.0
-platformdirs @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_platformdirs_1746710438/work
-prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1744724089886/work
-psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1740663128538/work
-ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1733302279685/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=92c32ff62b5fd8cf325bec5ab90d7be3d2a8ca8c8a3813ff487a8d2002630d1f
-pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1733569405015/work
-Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1750615794071/work
-pyparsing==3.2.3
-PySocks==1.7.1
-python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_python-dateutil_1751104122/work
-pytz==2025.2
-PyYAML==6.0.2
-pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1749898457097/work
-regex==2024.11.6
-requests==2.32.4
-safetensors==0.5.3
-six @ file:///home/conda/feedstock_root/build_artifacts/six_1733380938961/work
-soupsieve==2.7
-stack_data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1733569443808/work
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchsummary==1.5.1
-torchvision==0.22.1
-tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1748003301700/work
-tqdm==4.67.1
-traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1733367359838/work
-transformers==4.53.2
-triton==3.3.1
-typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_typing_extensions_1751643513/work
-tzdata==2025.2
-unzip==1.0.0
-urllib3==2.5.0
-wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1733231326287/work
-zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1749421620841/work
+usage: conda [-h] [-v] [--no-plugins] [-V] COMMAND ...
+
+conda is a tool for managing and deploying applications, environments and packages.
+
+options:
+  -h, --help          Show this help message and exit.
+  -v, --verbose       Can be used multiple times. Once for detailed output,
+                      twice for INFO logging, thrice for DEBUG logging, four
+                      times for TRACE logging.
+  --no-plugins        Disable all plugins that are not built into conda.
+  -V, --version       Show the conda version number and exit.
+
+commands:
+  The following built-in and plugins subcommands are available.
+
+  COMMAND
+    activate          Activate a conda environment.
+    clean             Remove unused packages and caches.
+    commands          List all available conda subcommands (including those
+                      from plugins). Generally only used by tab-completion.
+    compare           Compare packages between conda environments.
+    config            Modify configuration values in .condarc.
+    content-trust     Signing and verification tools for Conda
+    create            Create a new conda environment from a list of specified
+                      packages.
+    deactivate        Deactivate the current active conda environment.
+    doctor            Display a health report for your environment.
+    export            Export a given environment
+    info              Display information about current conda install.
+    init              Initialize conda for shell interaction.
+    install           Install a list of packages into a specified conda
+                      environment.
+    list              List installed packages in a conda environment.
+    notices           Retrieve latest channel notifications.
+    package           Create low-level conda packages. (EXPERIMENTAL)
+    remove (uninstall)
+                      Remove a list of packages from a specified conda
+                      environment.
+    rename            Rename an existing environment.
+    repoquery         Advanced search for repodata.
+    run               Run an executable in a conda environment.
+    search            Search for packages and display associated information
+                      using the MatchSpec format.
+    update (upgrade)  Update conda packages to the latest compatible version.
diff --git a/utils.py b/utils.py
index 72f397a47368b5313df8f66019c30020a600573b..2ae520dc3d861db006d05a27fcb3ba7409bdd60d 100644
--- a/utils.py
+++ b/utils.py
@@ -20,7 +20,7 @@ def get_time_embedding(timesteps):
         timesteps = timesteps.unsqueeze(0)
     
     # Shape: (160,)
-    freqs = torch.pow(10000, -torch.arange(start=0, end=160, dtype=torch.float32) / 160) 
+    freqs = torch.pow(10000, -torch.arange(start=0, end=160, dtype=torch.float32, device=timesteps.device) / 160) 
     # Shape: (B, 160)
     x = timesteps.float()[:, None] * freqs[None]
     # Shape: (B, 320)
@@ -101,6 +101,12 @@ def check_inputs(image, condition_image, mask, width, height):
     condition_image = resize_and_padding(condition_image, (width, height))
     return image, condition_image, mask
 
+def check_inputs_maskfree(image, condition_image, width, height):
+    if isinstance(image, torch.Tensor) and isinstance(condition_image, torch.Tensor):
+        return image, condition_image
+    image = resize_and_crop(image, (width, height))
+    condition_image = resize_and_padding(condition_image, (width, height))
+    return image, condition_image
 
 def repaint_result(result, person_image, mask_image):
     result, person, mask = np.array(result), np.array(person_image), np.array(mask_image)