{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6d50f66c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model already downloaded.\n"
     ]
    }
   ],
   "source": [
    "# check if the model is downloaded,  if not download it\n",
    "import os\n",
    "if not os.path.exists(\"instruct-pix2pix-00-22000.ckpt\"):\n",
    "    !wget https://huggingface.co/timbrooks/instruct-pix2pix/resolve/main/instruct-pix2pix-00-22000.ckpt\n",
    "else:\n",
    "    print(\"Model already downloaded.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3598a305",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded finetuned weights from maskfree_finetuned_weights.safetensors\n",
      "Loading 0.in_proj.weight\n",
      "Loading 0.out_proj.weight\n",
      "Loading 0.out_proj.bias\n",
      "Loading 8.in_proj.weight\n",
      "Loading 8.out_proj.weight\n",
      "Loading 8.out_proj.bias\n",
      "Loading 16.in_proj.weight\n",
      "Loading 16.out_proj.weight\n",
      "Loading 16.out_proj.bias\n",
      "Loading 24.in_proj.weight\n",
      "Loading 24.out_proj.weight\n",
      "Loading 24.out_proj.bias\n",
      "Loading 32.in_proj.weight\n",
      "Loading 32.out_proj.weight\n",
      "Loading 32.out_proj.bias\n",
      "Loading 40.in_proj.weight\n",
      "Loading 40.out_proj.weight\n",
      "Loading 40.out_proj.bias\n",
      "Loading 48.in_proj.weight\n",
      "Loading 48.out_proj.weight\n",
      "Loading 48.out_proj.bias\n",
      "Loading 56.in_proj.weight\n",
      "Loading 56.out_proj.weight\n",
      "Loading 56.out_proj.bias\n",
      "Loading 64.in_proj.weight\n",
      "Loading 64.out_proj.weight\n",
      "Loading 64.out_proj.bias\n",
      "Loading 72.in_proj.weight\n",
      "Loading 72.out_proj.weight\n",
      "Loading 72.out_proj.bias\n",
      "Loading 80.in_proj.weight\n",
      "Loading 80.out_proj.weight\n",
      "Loading 80.out_proj.bias\n",
      "Loading 88.in_proj.weight\n",
      "Loading 88.out_proj.weight\n",
      "Loading 88.out_proj.bias\n",
      "Loading 96.in_proj.weight\n",
      "Loading 96.out_proj.weight\n",
      "Loading 96.out_proj.bias\n",
      "Loading 104.in_proj.weight\n",
      "Loading 104.out_proj.weight\n",
      "Loading 104.out_proj.bias\n",
      "Loading 112.in_proj.weight\n",
      "Loading 112.out_proj.weight\n",
      "Loading 112.out_proj.bias\n",
      "Loading 120.in_proj.weight\n",
      "Loading 120.out_proj.weight\n",
      "Loading 120.out_proj.bias\n",
      "\n",
      "Attention module weights loaded from {finetune_weights_path} successfully.\n"
     ]
    }
   ],
   "source": [
    "import load_model\n",
    "\n",
    "models=load_model.preload_models_from_standard_weights(ckpt_path=\"instruct-pix2pix-00-22000.ckpt\", device=\"cuda\", finetune_weights_path=\"maskfree_finetuned_weights.safetensors\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78e3d8b9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5627b2d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/mahesh/miniconda3/envs/harsh/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "ename": "AssertionError",
     "evalue": "File ./sample_dataset/samples_pairs.txt does not exist.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[3], line 97\u001b[0m\n\u001b[1;32m     94\u001b[0m             result\u001b[38;5;241m.\u001b[39msave(output_path)\n\u001b[1;32m     96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 97\u001b[0m     \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/harsh/lib/python3.10/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    115\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[3], line 48\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m     46\u001b[0m \u001b[38;5;66;03m# Dataset\u001b[39;00m\n\u001b[1;32m     47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m args\u001b[38;5;241m.\u001b[39mdataset_name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvitonhd\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 48\u001b[0m     dataset \u001b[38;5;241m=\u001b[39m \u001b[43mVITONHDTestDataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     49\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     50\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid dataset name \u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs\u001b[38;5;241m.\u001b[39mdataset\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[0;32m~/harsh/stable-diffusion/VITON_Dataset.py:14\u001b[0m, in \u001b[0;36mInferenceDataset.__init__\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8\u001b[39m) \n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmask_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8\u001b[39m, do_normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, do_binarize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, do_convert_grayscale\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/harsh/stable-diffusion/VITON_Dataset.py:36\u001b[0m, in \u001b[0;36mVITONHDTestDataset.load_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mload_data\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m     35\u001b[0m     name\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mis_train \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 36\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(pair_txt\u001b[38;5;241m:=\u001b[39mos\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdata_root_path, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_pairs.txt\u001b[39m\u001b[38;5;124m'\u001b[39m)), \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpair_txt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     37\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(pair_txt, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m     38\u001b[0m         lines \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mreadlines()\n",
      "\u001b[0;31mAssertionError\u001b[0m: File ./sample_dataset/samples_pairs.txt does not exist."
     ]
    }
   ],
   "source": [
    "import os\n",
    "import torch\n",
    "import argparse\n",
    "from torch.utils.data import DataLoader\n",
    "from VITON_Dataset import VITONHDTestDataset\n",
    "from tqdm import tqdm\n",
    "from PIL import Image\n",
    "from CatVTON_model import CatVTONPix2PixPipeline\n",
    "\n",
    "from utils import to_pil_image\n",
    "\n",
    "@torch.no_grad()\n",
    "def main():\n",
    "    args=argparse.Namespace()\n",
    "    args.__dict__= {\n",
    "        \"dataset_name\": \"vitonhd\",\n",
    "        \"data_root_path\": \"./sample_dataset\",\n",
    "        \"output_dir\": \"./mask-free-output\",\n",
    "        \"seed\": 555,\n",
    "        \"batch_size\": 1,\n",
    "        \"num_inference_steps\": 50,\n",
    "        \"guidance_scale\": 2.5,\n",
    "        \"width\": 384,\n",
    "        \"height\": 512,\n",
    "        \"eval_pair\": False,\n",
    "        \"concat_eval_results\": True,\n",
    "        \"allow_tf32\": True,\n",
    "        \"dataloader_num_workers\": 4,\n",
    "        \"mixed_precision\": 'no',\n",
    "        \"concat_axis\": 'y',\n",
    "        \"enable_condition_noise\": True,\n",
    "        \"is_train\": False\n",
    "    }\n",
    "\n",
    "    # Pipeline\n",
    "    pipeline = CatVTONPix2PixPipeline(\n",
    "        weight_dtype={\n",
    "            \"no\": torch.float32,\n",
    "            \"fp16\": torch.float16,\n",
    "            \"bf16\": torch.bfloat16,\n",
    "        }[args.mixed_precision],\n",
    "        device=\"cuda\",\n",
    "        skip_safety_check=True,\n",
    "        models=models,\n",
    "    )\n",
    "    # Dataset\n",
    "    if args.dataset_name == \"vitonhd\":\n",
    "        dataset = VITONHDTestDataset(args)\n",
    "    else:\n",
    "        raise ValueError(f\"Invalid dataset name {args.dataset}.\")\n",
    "    print(f\"Dataset {args.dataset_name} loaded, total {len(dataset)} pairs.\")\n",
    "    dataloader = DataLoader(\n",
    "        dataset,\n",
    "        batch_size=args.batch_size,\n",
    "        shuffle=False,\n",
    "        num_workers=args.dataloader_num_workers\n",
    "    )\n",
    "        \n",
    "    # Inference\n",
    "    generator = torch.Generator(device='cuda').manual_seed(args.seed)\n",
    "    args.output_dir = os.path.join(args.output_dir, f\"{args.dataset_name}-{args.height}\", \"paired\" if args.eval_pair else \"unpaired\")\n",
    "    if not os.path.exists(args.output_dir):\n",
    "        os.makedirs(args.output_dir)\n",
    "        \n",
    "    for batch in tqdm(dataloader):\n",
    "        person_images = batch['person']\n",
    "        cloth_images = batch['cloth']\n",
    "\n",
    "        results = pipeline(\n",
    "            person_images,\n",
    "            cloth_images,\n",
    "            num_inference_steps=args.num_inference_steps,\n",
    "            guidance_scale=args.guidance_scale,\n",
    "            height=args.height,\n",
    "            width=args.width,\n",
    "            generator=generator,\n",
    "        )\n",
    "        \n",
    "        if args.concat_eval_results:\n",
    "            person_images = to_pil_image(person_images)\n",
    "            cloth_images = to_pil_image(cloth_images)\n",
    "        for i, result in enumerate(results):\n",
    "            person_name = batch['person_name'][i]\n",
    "            output_path = os.path.join(args.output_dir, person_name)\n",
    "            if not os.path.exists(os.path.dirname(output_path)):\n",
    "                os.makedirs(os.path.dirname(output_path))\n",
    "            if args.concat_eval_results:\n",
    "                w, h = result.size\n",
    "                concated_result = Image.new('RGB', (w*3, h))\n",
    "                concated_result.paste(person_images[i], (0, 0))\n",
    "                concated_result.paste(cloth_images[i], (w, 0))  \n",
    "                concated_result.paste(result, (w*2, 0))\n",
    "                result = concated_result\n",
    "            result.save(output_path)\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39537851",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "22fb6113",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c374cc6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bddce5df",
   "metadata": {
    "vscode": {
     "languageId": "markdown"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "harsh",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}