{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6d50f66c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model already downloaded.\n" ] } ], "source": [ "# check if the model is downloaded, if not download it\n", "import os\n", "if not os.path.exists(\"instruct-pix2pix-00-22000.ckpt\"):\n", " !wget https://huggingface.co/timbrooks/instruct-pix2pix/resolve/main/instruct-pix2pix-00-22000.ckpt\n", "else:\n", " print(\"Model already downloaded.\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "3598a305", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded finetuned weights from maskfree_finetuned_weights.safetensors\n", "Loading 0.in_proj.weight\n", "Loading 0.out_proj.weight\n", "Loading 0.out_proj.bias\n", "Loading 8.in_proj.weight\n", "Loading 8.out_proj.weight\n", "Loading 8.out_proj.bias\n", "Loading 16.in_proj.weight\n", "Loading 16.out_proj.weight\n", "Loading 16.out_proj.bias\n", "Loading 24.in_proj.weight\n", "Loading 24.out_proj.weight\n", "Loading 24.out_proj.bias\n", "Loading 32.in_proj.weight\n", "Loading 32.out_proj.weight\n", "Loading 32.out_proj.bias\n", "Loading 40.in_proj.weight\n", "Loading 40.out_proj.weight\n", "Loading 40.out_proj.bias\n", "Loading 48.in_proj.weight\n", "Loading 48.out_proj.weight\n", "Loading 48.out_proj.bias\n", "Loading 56.in_proj.weight\n", "Loading 56.out_proj.weight\n", "Loading 56.out_proj.bias\n", "Loading 64.in_proj.weight\n", "Loading 64.out_proj.weight\n", "Loading 64.out_proj.bias\n", "Loading 72.in_proj.weight\n", "Loading 72.out_proj.weight\n", "Loading 72.out_proj.bias\n", "Loading 80.in_proj.weight\n", "Loading 80.out_proj.weight\n", "Loading 80.out_proj.bias\n", "Loading 88.in_proj.weight\n", "Loading 88.out_proj.weight\n", "Loading 88.out_proj.bias\n", "Loading 96.in_proj.weight\n", "Loading 96.out_proj.weight\n", "Loading 96.out_proj.bias\n", "Loading 104.in_proj.weight\n", "Loading 104.out_proj.weight\n", "Loading 104.out_proj.bias\n", "Loading 112.in_proj.weight\n", "Loading 112.out_proj.weight\n", "Loading 112.out_proj.bias\n", "Loading 120.in_proj.weight\n", "Loading 120.out_proj.weight\n", "Loading 120.out_proj.bias\n", "\n", "Attention module weights loaded from {finetune_weights_path} successfully.\n" ] } ], "source": [ "import load_model\n", "\n", "models=load_model.preload_models_from_standard_weights(ckpt_path=\"instruct-pix2pix-00-22000.ckpt\", device=\"cuda\", finetune_weights_path=\"maskfree_finetuned_weights.safetensors\")" ] }, { "cell_type": "code", "execution_count": null, "id": "78e3d8b9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "id": "5627b2d2", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/mahesh/miniconda3/envs/harsh/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "ename": "AssertionError", "evalue": "File ./sample_dataset/samples_pairs.txt does not exist.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[3], line 97\u001b[0m\n\u001b[1;32m 94\u001b[0m result\u001b[38;5;241m.\u001b[39msave(output_path)\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 97\u001b[0m \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/harsh/lib/python3.10/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[3], line 48\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;66;03m# Dataset\u001b[39;00m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m args\u001b[38;5;241m.\u001b[39mdataset_name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvitonhd\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 48\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mVITONHDTestDataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid dataset name \u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs\u001b[38;5;241m.\u001b[39mdataset\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[0;32m~/harsh/stable-diffusion/VITON_Dataset.py:14\u001b[0m, in \u001b[0;36mInferenceDataset.__init__\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8\u001b[39m) \n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmask_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8\u001b[39m, do_normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, do_binarize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, do_convert_grayscale\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/harsh/stable-diffusion/VITON_Dataset.py:36\u001b[0m, in \u001b[0;36mVITONHDTestDataset.load_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mload_data\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 35\u001b[0m name\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mis_train \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(pair_txt\u001b[38;5;241m:=\u001b[39mos\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdata_root_path, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_pairs.txt\u001b[39m\u001b[38;5;124m'\u001b[39m)), \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpair_txt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(pair_txt, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 38\u001b[0m lines \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mreadlines()\n", "\u001b[0;31mAssertionError\u001b[0m: File ./sample_dataset/samples_pairs.txt does not exist." ] } ], "source": [ "import os\n", "import torch\n", "import argparse\n", "from torch.utils.data import DataLoader\n", "from VITON_Dataset import VITONHDTestDataset\n", "from tqdm import tqdm\n", "from PIL import Image\n", "from CatVTON_model import CatVTONPix2PixPipeline\n", "\n", "from utils import to_pil_image\n", "\n", "@torch.no_grad()\n", "def main():\n", " args=argparse.Namespace()\n", " args.__dict__= {\n", " \"dataset_name\": \"vitonhd\",\n", " \"data_root_path\": \"./sample_dataset\",\n", " \"output_dir\": \"./mask-free-output\",\n", " \"seed\": 555,\n", " \"batch_size\": 1,\n", " \"num_inference_steps\": 50,\n", " \"guidance_scale\": 2.5,\n", " \"width\": 384,\n", " \"height\": 512,\n", " \"eval_pair\": False,\n", " \"concat_eval_results\": True,\n", " \"allow_tf32\": True,\n", " \"dataloader_num_workers\": 4,\n", " \"mixed_precision\": 'no',\n", " \"concat_axis\": 'y',\n", " \"enable_condition_noise\": True,\n", " \"is_train\": False\n", " }\n", "\n", " # Pipeline\n", " pipeline = CatVTONPix2PixPipeline(\n", " weight_dtype={\n", " \"no\": torch.float32,\n", " \"fp16\": torch.float16,\n", " \"bf16\": torch.bfloat16,\n", " }[args.mixed_precision],\n", " device=\"cuda\",\n", " skip_safety_check=True,\n", " models=models,\n", " )\n", " # Dataset\n", " if args.dataset_name == \"vitonhd\":\n", " dataset = VITONHDTestDataset(args)\n", " else:\n", " raise ValueError(f\"Invalid dataset name {args.dataset}.\")\n", " print(f\"Dataset {args.dataset_name} loaded, total {len(dataset)} pairs.\")\n", " dataloader = DataLoader(\n", " dataset,\n", " batch_size=args.batch_size,\n", " shuffle=False,\n", " num_workers=args.dataloader_num_workers\n", " )\n", " \n", " # Inference\n", " generator = torch.Generator(device='cuda').manual_seed(args.seed)\n", " args.output_dir = os.path.join(args.output_dir, f\"{args.dataset_name}-{args.height}\", \"paired\" if args.eval_pair else \"unpaired\")\n", " if not os.path.exists(args.output_dir):\n", " os.makedirs(args.output_dir)\n", " \n", " for batch in tqdm(dataloader):\n", " person_images = batch['person']\n", " cloth_images = batch['cloth']\n", "\n", " results = pipeline(\n", " person_images,\n", " cloth_images,\n", " num_inference_steps=args.num_inference_steps,\n", " guidance_scale=args.guidance_scale,\n", " height=args.height,\n", " width=args.width,\n", " generator=generator,\n", " )\n", " \n", " if args.concat_eval_results:\n", " person_images = to_pil_image(person_images)\n", " cloth_images = to_pil_image(cloth_images)\n", " for i, result in enumerate(results):\n", " person_name = batch['person_name'][i]\n", " output_path = os.path.join(args.output_dir, person_name)\n", " if not os.path.exists(os.path.dirname(output_path)):\n", " os.makedirs(os.path.dirname(output_path))\n", " if args.concat_eval_results:\n", " w, h = result.size\n", " concated_result = Image.new('RGB', (w*3, h))\n", " concated_result.paste(person_images[i], (0, 0))\n", " concated_result.paste(cloth_images[i], (w, 0)) \n", " concated_result.paste(result, (w*2, 0))\n", " result = concated_result\n", " result.save(output_path)\n", "\n", "if __name__ == \"__main__\":\n", " main()" ] }, { "cell_type": "code", "execution_count": null, "id": "39537851", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "22fb6113", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1c374cc6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "bddce5df", "metadata": { "vscode": { "languageId": "markdown" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "harsh", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }