+
EditP23: 3D Editing via Propagation of Image Prompts to Multi-View
+
+
+ This is the official Gradio demo for EditP23, a method for fast, mask-free 3D editing that propagates 2D image edits to multi-view representations in a 3D-consistent manner. The edit is guided by an image pair, allowing users to leverage any preferred 2D editing tool, from manual painting to generative pipelines.
+
+
+"""
+
+HOW_TO_USE_TEXT = """
+
+
+
EditP23 requires three specific images to perform an edit. This demo automates the process, but understanding each component is key.
+
+ - Original Multi-View Image (`src_mv.png`): This is a 2x3 grid of six different views of the original, unedited object. The model uses this as the base to apply the edit consistently across all angles.
+ - Source Condition (`src.png`): This is a single, frontal view of the original object. It acts as the "before" image for the edit.
+ - Target Condition (`edited.png`): This is the "after" image. It's the same view as
src.png, but with your desired 2D modification applied. The difference between this image and src.png is what guides the 3D edit.
+
+
+
How to Prepare Your Own Images
+
You can generate the required input images using the helper scripts provided in our GitHub repository.
+
Step 1: Generate src.png and src_mv.png
+
You have two options for creating the initial views of your object.
+
+
Step 2: Create edited.png
+
Use any 2D image editor to modify your src.png. This is where your creativity comes in! For quick edits, we recommend these online tools:
+
+ - FlowEdit: Excellent for global, structural edits.
+ - Flux-Inpainting: Great for local modifications and inpainting.
+
+
+
Understanding the Parameters
+
+ n_max: Controls how many denoising steps are influenced by your edit. Higher values are needed for more significant geometric changes.
+ tar_guidance_scale: Determines the strength of your edit. Increase this for more dramatic changes, but be aware that very high values can sometimes introduce artifacts.
+ src_guidance_scale: Controls how strongly the model adheres to the original object's identity. This can usually be left at its default value.
+
+
+
Reconstructing a 3D Model
+
After this demo generates an edited multi-view image, you can use the scripts/recon.py script from our repository to convert it back into a 3D model (.obj file).
+
python scripts/recon.py path/to/instant-mesh-large.yaml --input_file "path/to/edited_mv.png" --output_dir "path/to/output/"
+
+"""
+
+# --- Gradio UI Layout ---
+# Create a custom theme to match the website's color
+theme = gr.themes.Base(
+ primary_hue=gr.themes.colors.blue,
+ secondary_hue=gr.themes.colors.blue,
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
+).set(
+ button_primary_background_fill="*primary_500",
+ button_primary_background_fill_hover="*primary_600",
+)
+
+# Custom CSS for better layout and fixing UI quirks
+CUSTOM_CSS = """
+.gradio-container { max-width: 95% !important; }
+.label-wrap { padding-top: 6px !import ant; } /* Fix label overlap */
+.help-text { color: #9CA3AF; font-size: 0.9rem; margin-top: 4px; margin-bottom: 12px; }
+.link-button { text-decoration: none; color: white; padding: 8px 16px; border-radius: 8px; font-weight: bold; transition: background-color 0.2s ease; }
+.link-button:hover { background-color: #4a5568 !important; }
+#action-buttons { margin-top: 1rem; }
+
+/* --- CSS Rules for the Examples Table --- */
+
+/* 1. CRITICAL FIX: Target the image's wrapper to prevent clipping. */
+#example-table td > div {
+ overflow: visible !important; /* This is the key to stop cropping. */
+ display: flex;
+ justify-content: center;
+ align-items: center;
+}
+
+/* 2. General cell styling for alignment and spacing */
+#example-table td {
+ vertical-align: middle !important;
+ padding: 8px !important;
+}
+
+/* 3. Force parameter columns (4-7) to have the same width */
+#example-table th:nth-child(n+4):nth-child(-n+7),
+#example-table td:nth-child(n+4):nth-child(-n+7) {
+ width: 85px !important;
+ max-width: 85px !important;
+ text-align: center;
+ word-break: break-word;
+}
+
+/* 4. Enlarge multi-view image (Col 1) with a 3:2 height:width ratio */
+#example-table td:nth-child(1) img {
+ height: 180px !important;
+ width: 120px !important; /* 180px / 120px = 3:2 ratio */
+ object-fit: contain !important; /* Ensures the whole image is visible */
+}
+
+/* 5. Enlarge condition images (Col 2 & 3) */
+#example-table td:nth-child(2) img,
+#example-table td:nth-child(3) img {
+ height: 150px !important;
+ width: 150px !important;
+ object-fit: contain !important;
+}
+"""
+
+
+with gr.Blocks(theme=theme, css=CUSTOM_CSS) as demo:
+ gr.Markdown(ABOUT_TEXT)
+
+ with gr.Tabs() as tabs:
+ with gr.TabItem("Interactive Demo", id=0):
+ with gr.Row(variant="panel", equal_height=False):
+ # Column 1: Inputs
+ with gr.Column(scale=1):
+ gr.Markdown("### 1. Input Images")
+ gr.Markdown(
+ 'See the "How to Use" tab for details on generating the **Multi-View Image** and creating your own **Edited Condition**.',
+ elem_classes="help-text",
+ )
+ original_mv_image = gr.Image(
+ type="numpy",
+ label="Original Multi-View Image (src_mv.png)",
+ height=675,
+ width=450,
+ )
+ with gr.Row():
+ src_cond_image = gr.Image(
+ type="numpy",
+ label="Source Condition (src.png)",
+ height=350,
+ width=350,
+ )
+ tgt_cond_image = gr.Image(
+ type="numpy",
+ label="Target Condition (edited.png)",
+ height=350,
+ width=350,
+ )
+
+ # Column 2: Parameters & Action
+ with gr.Column(scale=1, min_width=300):
+ gr.Markdown("### 2. Parameters")
+ with gr.Accordion("Advanced Parameters", open=True):
+ t_steps = gr.Slider(
+ minimum=1,
+ maximum=100,
+ value=50,
+ step=1,
+ label="T_steps",
+ info="Total number of denoising steps.",
+ )
+ n_max = gr.Slider(
+ minimum=1,
+ maximum=50,
+ value=31,
+ step=1,
+ label="n_max",
+ info="Number of scheduler steps for edit-aware guidance. Increase for more significant edits.",
+ )
+ src_gs = gr.Slider(
+ minimum=1.0,
+ maximum=10.0,
+ value=3.5,
+ step=0.1,
+ label="Source CFG",
+ info="Guidance scale for the source condition. Can typically remain constant.",
+ )
+ tar_gs = gr.Slider(
+ minimum=1.0,
+ maximum=30.0,
+ value=5.0,
+ step=0.1,
+ label="Target CFG",
+ info="Guidance scale for the target condition. Increase for more significant edits.",
+ )
+ seed = gr.Slider(
+ minimum=0,
+ maximum=10000,
+ value=18,
+ step=1,
+ label="Seed",
+ info="Random seed for reproducibility.",
+ )
+
+ with gr.Row(elem_id="action-buttons"):
+ clear_button = gr.Button("Clear", variant="secondary", scale=1)
+ run_button = gr.Button("Generate", variant="primary", scale=2)
+
+ # Column 3: Output
+ with gr.Column(scale=2, min_width=350):
+ gr.Markdown("### 3. Output Image")
+ output_image = gr.Image(
+ type="pil",
+ label="Edited Result",
+ height=450,
+ width=450,
+ interactive=False,
+ )
+ gr.Markdown(
+ 'After generating, you can use the `recon.py` script to create a 3D model. See the "How to Use" tab for the full command.',
+ elem_classes="help-text",
+ )
+
+ # --- Examples Section ---
+ if os.path.exists(EXAMPLES_PATH):
+ gr.Markdown("---")
+ gr.Markdown("### Click an Example to Load")
+
+ example_inputs = [
+ original_mv_image,
+ src_cond_image,
+ tgt_cond_image,
+ t_steps,
+ n_max,
+ src_gs,
+ tar_gs,
+ ]
+
+ example_data = [
+ [
+ os.path.join(EXAMPLES_PATH, "bike_vintage", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_vintage", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_vintage", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "robot_sunglasses", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "robot_sunglasses", "src.png"),
+ os.path.join(EXAMPLES_PATH, "robot_sunglasses", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "stormtrooper_donut", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "stormtrooper_donut", "src.png"),
+ os.path.join(EXAMPLES_PATH, "stormtrooper_donut", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "figure_zombie", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "figure_zombie", "src.png"),
+ os.path.join(EXAMPLES_PATH, "figure_zombie", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "deer_pixar", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "deer_pixar", "src.png"),
+ os.path.join(EXAMPLES_PATH, "deer_pixar", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "german-shep_plush", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_plush", "src.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_plush", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "deer_wings", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "deer_wings", "src.png"),
+ os.path.join(EXAMPLES_PATH, "deer_wings", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "batman_jetpack", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "batman_jetpack", "src.png"),
+ os.path.join(EXAMPLES_PATH, "batman_jetpack", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "bike_sport", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_sport", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_sport", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "red-dragon_tail", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "red-dragon_tail", "src.png"),
+ os.path.join(EXAMPLES_PATH, "red-dragon_tail", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "cake_oreo", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cake_oreo", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cake_oreo", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "bike_harley", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_harley", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_harley", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "bike_modern", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_modern", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_modern", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "bmw_speedy", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bmw_speedy", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bmw_speedy", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "batman_backpack", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "batman_backpack", "src.png"),
+ os.path.join(EXAMPLES_PATH, "batman_backpack", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "figure_backpack", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "figure_backpack", "src.png"),
+ os.path.join(EXAMPLES_PATH, "figure_backpack", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "car_cartoon", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "car_cartoon", "src.png"),
+ os.path.join(EXAMPLES_PATH, "car_cartoon", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "car_engine", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "car_engine", "src.png"),
+ os.path.join(EXAMPLES_PATH, "car_engine", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "car_steampunk", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "car_steampunk", "src.png"),
+ os.path.join(EXAMPLES_PATH, "car_steampunk", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "green-dragon_skirt", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "green-dragon_skirt", "src.png"),
+ os.path.join(EXAMPLES_PATH, "green-dragon_skirt", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_pagoda", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_pagoda", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_pagoda", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "oasis_magical", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "oasis_magical", "src.png"),
+ os.path.join(EXAMPLES_PATH, "oasis_magical", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "cabin_alpine", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_alpine", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_alpine", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "cabin_gothic", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_gothic", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_gothic", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "fox_tuxedo", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "fox_tuxedo", "src.png"),
+ os.path.join(EXAMPLES_PATH, "fox_tuxedo", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "cabin_haunted", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_haunted", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_haunted", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "fox_eyes", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "fox_eyes", "src.png"),
+ os.path.join(EXAMPLES_PATH, "fox_eyes", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_disney", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_disney", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_disney", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "desk_wizard", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "desk_wizard", "src.png"),
+ os.path.join(EXAMPLES_PATH, "desk_wizard", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_light", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_light", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_light", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_roof", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_roof", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_roof", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_earphones", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_earphones", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_earphones", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_rust", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_rust", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_rust", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "german-shep_pixar", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_pixar", "src.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_pixar", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_kimono", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_kimono", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_kimono", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "ship_fantasy", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "ship_fantasy", "src.png"),
+ os.path.join(EXAMPLES_PATH, "ship_fantasy", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_lego-fig", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_lego-fig", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_lego-fig", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "nurse_sporty", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "nurse_sporty", "src.png"),
+ os.path.join(EXAMPLES_PATH, "nurse_sporty", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "r2d2_golden", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "r2d2_golden", "src.png"),
+ os.path.join(EXAMPLES_PATH, "r2d2_golden", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_the-force", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_the-force", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_the-force", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "spiderbot_chrome", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "spiderbot_chrome", "src.png"),
+ os.path.join(EXAMPLES_PATH, "spiderbot_chrome", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(
+ EXAMPLES_PATH, "spiderbot_steampunk", "src_mv.png"
+ ),
+ os.path.join(EXAMPLES_PATH, "spiderbot_steampunk", "src.png"),
+ os.path.join(
+ EXAMPLES_PATH, "spiderbot_steampunk", "edited.png"
+ ),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "superman_crossed", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "superman_crossed", "src.png"),
+ os.path.join(EXAMPLES_PATH, "superman_crossed", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ ]
+
+ gr.Examples(
+ examples=example_data,
+ inputs=example_inputs,
+ label="Example Edits",
+ examples_per_page=10,
+ elem_id="example-table"
+
+ )
+
+ with gr.TabItem("How to Use", id=1):
+ gr.Markdown(HOW_TO_USE_TEXT)
+
+ # Define button actions
+ run_button.click(
+ fn=run_main_script,
+ inputs=[
+ src_cond_image,
+ tgt_cond_image,
+ original_mv_image,
+ t_steps,
+ n_max,
+ src_gs,
+ tar_gs,
+ seed,
+ ],
+ outputs=output_image,
+ )
+
+ clear_button.click(
+ fn=clear_inputs,
+ inputs=[],
+ outputs=[
+ original_mv_image,
+ src_cond_image,
+ tgt_cond_image,
+ t_steps,
+ n_max,
+ src_gs,
+ tar_gs,
+ seed,
+ output_image,
+ ],
+ )
+
+if __name__ == "__main__":
+ demo.launch(share=True)
diff --git a/assets/stormtrooper.glb b/assets/stormtrooper.glb
new file mode 100644
index 0000000000000000000000000000000000000000..a61e292d4e58e8f90da0c3889e12f1ce9590ea8b
--- /dev/null
+++ b/assets/stormtrooper.glb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:921f8656fac3332d100969f40724455b7d34565625cf2a29d9e36f6c81b1d1c9
+size 1659928
diff --git a/examples/batman_backpack/edited.png b/examples/batman_backpack/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..e549a6372d87bcb2a169a7860c65db347a5e54f8
--- /dev/null
+++ b/examples/batman_backpack/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b931e0b36421524569533d2076f7885050f376c8a89978ee5b6c5fb91364ebbf
+size 211819
diff --git a/examples/batman_backpack/src.png b/examples/batman_backpack/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..7b9cc64faf353aee3c7e2d9cecdcda73506fe663
--- /dev/null
+++ b/examples/batman_backpack/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0829cda6608dbcc1de82f45c3887fdcfbdf4f5271976ca67b3d3d815ccebf018
+size 94721
diff --git a/examples/batman_backpack/src_mv.png b/examples/batman_backpack/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b069016e67984c8f504a0521b4f62fcceb1f4ee
--- /dev/null
+++ b/examples/batman_backpack/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6309e40375714b89f843d3623e00150c79add3d417a8b04317539fcaf3d997e0
+size 171122
diff --git a/examples/batman_jetpack/edited.png b/examples/batman_jetpack/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5f671ba33a2eb7e56ee894692948ba9dae119e9a
--- /dev/null
+++ b/examples/batman_jetpack/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b312bfaeb098b1fe45ed61824b6e0a9a956e6f8969d880347eb4ac17512a0ac6
+size 223972
diff --git a/examples/batman_jetpack/src.png b/examples/batman_jetpack/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..7b9cc64faf353aee3c7e2d9cecdcda73506fe663
--- /dev/null
+++ b/examples/batman_jetpack/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0829cda6608dbcc1de82f45c3887fdcfbdf4f5271976ca67b3d3d815ccebf018
+size 94721
diff --git a/examples/batman_jetpack/src_mv.png b/examples/batman_jetpack/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b069016e67984c8f504a0521b4f62fcceb1f4ee
--- /dev/null
+++ b/examples/batman_jetpack/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6309e40375714b89f843d3623e00150c79add3d417a8b04317539fcaf3d997e0
+size 171122
diff --git a/examples/bike_harley/edited.png b/examples/bike_harley/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ce97c8c8db1e513b5c6a6e6e37463d42ff4ae969
--- /dev/null
+++ b/examples/bike_harley/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b810cc2f73c107c698f03c2bd8ad4d5e13646ba5a327818670a98c1c7cb96da4
+size 164991
diff --git a/examples/bike_harley/src.png b/examples/bike_harley/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..6392df028a8211c8e3b97e85bf75cfc19e9e3ec5
--- /dev/null
+++ b/examples/bike_harley/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d122fa2e465bf5cfe1856cf913e029fee66d2229ea7b8a5f77254f0acaa44622
+size 180509
diff --git a/examples/bike_harley/src_mv.png b/examples/bike_harley/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69da597de7ffbf963a1e434ea36bcdabd7d8ad09
--- /dev/null
+++ b/examples/bike_harley/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343367a134e514ac65e3d06181e61a944ddf86112b52054ba2719383129e1862
+size 356471
diff --git a/examples/bike_modern/edited.png b/examples/bike_modern/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5ba510dfb1d6b13adbf757414ed49b2ea3f29bce
--- /dev/null
+++ b/examples/bike_modern/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e655a517446a880a7d9063fe271469bca3e6f7f0a8795f4e2c6d4596b195b9
+size 163977
diff --git a/examples/bike_modern/src.png b/examples/bike_modern/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..6392df028a8211c8e3b97e85bf75cfc19e9e3ec5
--- /dev/null
+++ b/examples/bike_modern/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d122fa2e465bf5cfe1856cf913e029fee66d2229ea7b8a5f77254f0acaa44622
+size 180509
diff --git a/examples/bike_modern/src_mv.png b/examples/bike_modern/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69da597de7ffbf963a1e434ea36bcdabd7d8ad09
--- /dev/null
+++ b/examples/bike_modern/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343367a134e514ac65e3d06181e61a944ddf86112b52054ba2719383129e1862
+size 356471
diff --git a/examples/bike_sport/edited.png b/examples/bike_sport/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..60d18e3f77d73749767f6c38e2710456a929d7d7
--- /dev/null
+++ b/examples/bike_sport/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:337d0f6dbe4df38020aa3de73d6acce82801f7efdcaedd37f297d9ec223e6cc4
+size 101239
diff --git a/examples/bike_sport/src.png b/examples/bike_sport/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..416489aa955a408046eb41474123ddd9f47c6412
--- /dev/null
+++ b/examples/bike_sport/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a161a0125c23afc5f79dced0ff10055722fe6379c54e9324aeed894a09ca613c
+size 91702
diff --git a/examples/bike_sport/src_mv.png b/examples/bike_sport/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..5fef5a894cdab38080e16f7a4b2ac94e98ea39b2
--- /dev/null
+++ b/examples/bike_sport/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f2c13a76e416db634e873ecd9b5c492c42e14e4c3310ee1583851c829582b32
+size 290624
diff --git a/examples/bike_vintage/edited.png b/examples/bike_vintage/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..6116d5530c6659504e387c9957d6a804177262c4
--- /dev/null
+++ b/examples/bike_vintage/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8230a205104ffa813e9231068e805be17bbe6c5efb8e41a4dc4a489c5e9206c
+size 171361
diff --git a/examples/bike_vintage/src.png b/examples/bike_vintage/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..6392df028a8211c8e3b97e85bf75cfc19e9e3ec5
--- /dev/null
+++ b/examples/bike_vintage/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d122fa2e465bf5cfe1856cf913e029fee66d2229ea7b8a5f77254f0acaa44622
+size 180509
diff --git a/examples/bike_vintage/src_mv.png b/examples/bike_vintage/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69da597de7ffbf963a1e434ea36bcdabd7d8ad09
--- /dev/null
+++ b/examples/bike_vintage/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343367a134e514ac65e3d06181e61a944ddf86112b52054ba2719383129e1862
+size 356471
diff --git a/examples/bmw_speedy/edited.png b/examples/bmw_speedy/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..821408fa220720ca1b13c1b86e77ccb7ad9a8a6b
--- /dev/null
+++ b/examples/bmw_speedy/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d45865fba033e4e599ab4a35198aac39e00bc00404147531bf37bdf25f5cf598
+size 91057
diff --git a/examples/bmw_speedy/src.png b/examples/bmw_speedy/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..0347ce26045ba3becca8a287a477d3ed2e39e71f
--- /dev/null
+++ b/examples/bmw_speedy/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0b7c8e768928d36db07363dd2a5cc7d0df6895118a4a57d1ad417f2dca9cccd
+size 108813
diff --git a/examples/bmw_speedy/src_mv.png b/examples/bmw_speedy/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..22512abbf452fa813779398ca0692094a0be436c
--- /dev/null
+++ b/examples/bmw_speedy/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d914de8ed583b5cf016dc4b601dc72b62f6999885c289a745ee09d86903dbda
+size 178455
diff --git a/examples/cabin_alpine/edited.png b/examples/cabin_alpine/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a9c7aecbe26966206d3ee6967670f870315d88f4
--- /dev/null
+++ b/examples/cabin_alpine/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be31a56b0aeea55dc8d71c23919017f8ad0c9a1d1c0b67370f37f427df93271c
+size 859027
diff --git a/examples/cabin_alpine/src.png b/examples/cabin_alpine/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..313357f8d4f1bfe27f4df2a417b8c339fcea7d58
--- /dev/null
+++ b/examples/cabin_alpine/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0655a2cca2e8b5e9590228252c239079963425d5f6d9f5ccc698fd78ed02687a
+size 304497
diff --git a/examples/cabin_alpine/src_mv.png b/examples/cabin_alpine/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e6ea4588be0de350c07e03c53ac8dea49955a34
--- /dev/null
+++ b/examples/cabin_alpine/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b211a612b264cb26d0b18f2a80f5273716f4d7d5b86a669f4c4ab8cf4167ff6
+size 519695
diff --git a/examples/cabin_gothic/edited.png b/examples/cabin_gothic/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..3c8878e47107524d8d33f9ccbe5c54281ca2f9c0
--- /dev/null
+++ b/examples/cabin_gothic/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7efc56a9f254cda00ec54ce93bec15335ecfe5f6bf10911a262fcd783f2bb8d
+size 881495
diff --git a/examples/cabin_gothic/src.png b/examples/cabin_gothic/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..313357f8d4f1bfe27f4df2a417b8c339fcea7d58
--- /dev/null
+++ b/examples/cabin_gothic/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0655a2cca2e8b5e9590228252c239079963425d5f6d9f5ccc698fd78ed02687a
+size 304497
diff --git a/examples/cabin_gothic/src_mv.png b/examples/cabin_gothic/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e6ea4588be0de350c07e03c53ac8dea49955a34
--- /dev/null
+++ b/examples/cabin_gothic/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b211a612b264cb26d0b18f2a80f5273716f4d7d5b86a669f4c4ab8cf4167ff6
+size 519695
diff --git a/examples/cabin_haunted/edited.png b/examples/cabin_haunted/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..446e3e524dfce77f47b745389d569634d4c338a0
--- /dev/null
+++ b/examples/cabin_haunted/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ce7a3b5abda615bd3f1e9694ba0d8925c912ef249b3f0127e7228f29a9ece44
+size 270283
diff --git a/examples/cabin_haunted/src.png b/examples/cabin_haunted/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..313357f8d4f1bfe27f4df2a417b8c339fcea7d58
--- /dev/null
+++ b/examples/cabin_haunted/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0655a2cca2e8b5e9590228252c239079963425d5f6d9f5ccc698fd78ed02687a
+size 304497
diff --git a/examples/cabin_haunted/src_mv.png b/examples/cabin_haunted/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e6ea4588be0de350c07e03c53ac8dea49955a34
--- /dev/null
+++ b/examples/cabin_haunted/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b211a612b264cb26d0b18f2a80f5273716f4d7d5b86a669f4c4ab8cf4167ff6
+size 519695
diff --git a/examples/cake_oreo/edited.png b/examples/cake_oreo/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5c9d7ad6fd6fe8c835067fcd7200c82468946e8b
--- /dev/null
+++ b/examples/cake_oreo/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe262f65660d656341c7780a5ce444eae965980abbba45d44754fde07e6d976f
+size 475666
diff --git a/examples/cake_oreo/src.png b/examples/cake_oreo/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..37db4cb3dce06e1b170948d5d309a312caedfb32
--- /dev/null
+++ b/examples/cake_oreo/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bab8e40296d3df9d572c43b494a3cf7de0374c9209a69765a91e00f4c4c2cba3
+size 251020
diff --git a/examples/cake_oreo/src_mv.png b/examples/cake_oreo/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..f8639de197db8713c8fcadcd47c07a308e7c8be1
--- /dev/null
+++ b/examples/cake_oreo/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8b8961a2dad370cf1dc548aed6d8e563c84f895027095d95872c565c3a7673a
+size 427688
diff --git a/examples/car_cartoon/edited.png b/examples/car_cartoon/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca726c7ea8fa9a7f8e1e05749da69049c22db29c
--- /dev/null
+++ b/examples/car_cartoon/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2f31141552cb72b4a5e7b204926d3c6cd7e36651edc2924a3d5d8e10437694e
+size 93554
diff --git a/examples/car_cartoon/src.png b/examples/car_cartoon/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..242c60181c683b7705b74026fc69534ed789c509
--- /dev/null
+++ b/examples/car_cartoon/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10dbdbdfe282d8f232caf66c6d84f9116639bcc50b8e70b2cde2a0f352fdccfe
+size 102456
diff --git a/examples/car_cartoon/src_mv.png b/examples/car_cartoon/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69972374bec1d6e783771d935fea1cc3ee4fd8bf
--- /dev/null
+++ b/examples/car_cartoon/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bf471ec55d12cac52b73620503439b3910fc2e31c8f7569d28c80beb57b1bb
+size 183102
diff --git a/examples/car_engine/edited.png b/examples/car_engine/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..884ce8c832896fda3308698e742055d956282308
--- /dev/null
+++ b/examples/car_engine/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d506435b214bb037f0c65312b8c29b356c79c31db7abc1c5e9776c38a1890fca
+size 254825
diff --git a/examples/car_engine/src.png b/examples/car_engine/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..242c60181c683b7705b74026fc69534ed789c509
--- /dev/null
+++ b/examples/car_engine/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10dbdbdfe282d8f232caf66c6d84f9116639bcc50b8e70b2cde2a0f352fdccfe
+size 102456
diff --git a/examples/car_engine/src_mv.png b/examples/car_engine/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69972374bec1d6e783771d935fea1cc3ee4fd8bf
--- /dev/null
+++ b/examples/car_engine/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bf471ec55d12cac52b73620503439b3910fc2e31c8f7569d28c80beb57b1bb
+size 183102
diff --git a/examples/car_steampunk/edited.png b/examples/car_steampunk/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..372e6cf1bc4e36061e02e03e4e973fc35a99fb98
--- /dev/null
+++ b/examples/car_steampunk/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f1f047c038f82081928c1c008b71529fe683bdbedf23814729f07156bb04405
+size 94126
diff --git a/examples/car_steampunk/src.png b/examples/car_steampunk/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..242c60181c683b7705b74026fc69534ed789c509
--- /dev/null
+++ b/examples/car_steampunk/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10dbdbdfe282d8f232caf66c6d84f9116639bcc50b8e70b2cde2a0f352fdccfe
+size 102456
diff --git a/examples/car_steampunk/src_mv.png b/examples/car_steampunk/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69972374bec1d6e783771d935fea1cc3ee4fd8bf
--- /dev/null
+++ b/examples/car_steampunk/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bf471ec55d12cac52b73620503439b3910fc2e31c8f7569d28c80beb57b1bb
+size 183102
diff --git a/examples/deer_pixar/edited.png b/examples/deer_pixar/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..8f7c748b007b86e03d40df13931167bfe683c2af
--- /dev/null
+++ b/examples/deer_pixar/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3851883c3f4985b580179430dd59ecabb2ab307d2fb2fac9b1d098ab8335c3d9
+size 124923
diff --git a/examples/deer_pixar/src.png b/examples/deer_pixar/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..4ded9777d65029234e51fd1e959a4b652bb8ee57
--- /dev/null
+++ b/examples/deer_pixar/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4283b7e23c6a7173982dd16f6943aaa3fc1367b0b20a4736c5b4b4ffeee0a1d5
+size 106593
diff --git a/examples/deer_pixar/src_mv.png b/examples/deer_pixar/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..42d97bb41764280f8e6f01a0f6c88c2ab252cb26
--- /dev/null
+++ b/examples/deer_pixar/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:402f558bf62ec8a269997bba9a5dc783d9ab8fafd963da897681b8b1494f6634
+size 215417
diff --git a/examples/deer_wings/edited.png b/examples/deer_wings/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ec01357c4034b300cab66f57228d438b1f4fdf54
--- /dev/null
+++ b/examples/deer_wings/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5a7f916061401398a1af50bf95e2c6d85195ae9869bf32ed7853d75edcb6566
+size 440500
diff --git a/examples/deer_wings/output/expected.png b/examples/deer_wings/output/expected.png
new file mode 100644
index 0000000000000000000000000000000000000000..c2d7c7f21c573e28f36d245346ad2fecba8e12e7
--- /dev/null
+++ b/examples/deer_wings/output/expected.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b12c9a3b96215d667329d4909e6d1bf5f3ee785be3781f10aae0fa930e4cb62
+size 556132
diff --git a/examples/deer_wings/src.png b/examples/deer_wings/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..4ded9777d65029234e51fd1e959a4b652bb8ee57
--- /dev/null
+++ b/examples/deer_wings/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4283b7e23c6a7173982dd16f6943aaa3fc1367b0b20a4736c5b4b4ffeee0a1d5
+size 106593
diff --git a/examples/deer_wings/src_mv.png b/examples/deer_wings/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..42d97bb41764280f8e6f01a0f6c88c2ab252cb26
--- /dev/null
+++ b/examples/deer_wings/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:402f558bf62ec8a269997bba9a5dc783d9ab8fafd963da897681b8b1494f6634
+size 215417
diff --git a/examples/desk_wizard/edited.png b/examples/desk_wizard/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..4fce7eb49ccf8bce0f0778ac6dd54d8652d2b3f9
--- /dev/null
+++ b/examples/desk_wizard/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b100e3edafef41e5414531e7f76b1391caca1f2d4de9685af0173c4b90f8c0d
+size 185536
diff --git a/examples/desk_wizard/src.png b/examples/desk_wizard/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..0eeacde0d3b28f2ae21014aea4409d6e5ed3df95
--- /dev/null
+++ b/examples/desk_wizard/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:beed78eccce5a36a39038ff7d21ced98b350104ef169bb721ecfcdff29aef400
+size 202095
diff --git a/examples/desk_wizard/src_mv.png b/examples/desk_wizard/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..588fde4139229d5570200b6782a6408265b70ec9
--- /dev/null
+++ b/examples/desk_wizard/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88b8c9f1a4f84b29da0990d22ee2bb1a3c4449b607ceb87783b31f9ec8e9d7d4
+size 424371
diff --git a/examples/figure_backpack/edited.png b/examples/figure_backpack/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..6de12f814f8101323831b87ec10e979fbc48ca68
--- /dev/null
+++ b/examples/figure_backpack/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12a43505bed404a07dfdde643bd81a4fe38c252c1ff530220ff05ab296fbe8ff
+size 281040
diff --git a/examples/figure_backpack/src.png b/examples/figure_backpack/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e77fa42bbe939067c9baa054809d828f8b4defe
--- /dev/null
+++ b/examples/figure_backpack/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57c0dbfffe6e3d1a6cfc052b90bf5f5bb97cd70b88550bae77e074213ba85eb5
+size 105256
diff --git a/examples/figure_backpack/src_mv.png b/examples/figure_backpack/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..a67870bb8659246d346817b499fd64f52de0ae35
--- /dev/null
+++ b/examples/figure_backpack/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e1c207dcf2e859f679828925c30760b21fb4a370f724f4c07bf005736e21410
+size 188907
diff --git a/examples/figure_zombie/edited.png b/examples/figure_zombie/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..f384768b56ec97e5fe1726cd8a39994cc99571b6
--- /dev/null
+++ b/examples/figure_zombie/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae4b387a3e566dccf7719d8f69d4580278e1e06819cd97fa3926bf47c0eacb84
+size 265916
diff --git a/examples/figure_zombie/src.png b/examples/figure_zombie/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbcbc0eb00d68a908783ce2ffab4ecfc0cd0c514
--- /dev/null
+++ b/examples/figure_zombie/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea5358bfb25731dd565ff9326680d73fa61fb2144a1b1a58f891e3d03dacac9a
+size 109657
diff --git a/examples/figure_zombie/src_mv.png b/examples/figure_zombie/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..92480d9cbc4499cd6f2feae70a05fafd371c92e5
--- /dev/null
+++ b/examples/figure_zombie/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8241b6618e97e27f8abb40157dc39c038c8576a56a4300990eef85dd93ee8bd
+size 189195
diff --git a/examples/fox_eyes/edited.png b/examples/fox_eyes/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a02d3c3bf8ef79bf7d4d24d1dc4bdbca9e859fb2
--- /dev/null
+++ b/examples/fox_eyes/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ddca6411e9231de47e0492fb5f840abf0afdfec610f045e375a1864c13a8849
+size 220288
diff --git a/examples/fox_eyes/src.png b/examples/fox_eyes/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..00873d1c5ddac03d0d3d017c28b26d6e191233a5
--- /dev/null
+++ b/examples/fox_eyes/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cb23e9ccbea032367aa721444814d5772f27c3bf31cc0814a6159ec71dd5fa9
+size 129432
diff --git a/examples/fox_eyes/src_mv.png b/examples/fox_eyes/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..789ac505a31a85d157b3d35005848f30684e47a3
--- /dev/null
+++ b/examples/fox_eyes/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d086775b1bba7851c4e6867cf95518024f9abe2df7290694c74f244da5e6aa
+size 202552
diff --git a/examples/fox_tuxedo/edited.png b/examples/fox_tuxedo/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a02d3c3bf8ef79bf7d4d24d1dc4bdbca9e859fb2
--- /dev/null
+++ b/examples/fox_tuxedo/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ddca6411e9231de47e0492fb5f840abf0afdfec610f045e375a1864c13a8849
+size 220288
diff --git a/examples/fox_tuxedo/src.png b/examples/fox_tuxedo/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..00873d1c5ddac03d0d3d017c28b26d6e191233a5
--- /dev/null
+++ b/examples/fox_tuxedo/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cb23e9ccbea032367aa721444814d5772f27c3bf31cc0814a6159ec71dd5fa9
+size 129432
diff --git a/examples/fox_tuxedo/src_mv.png b/examples/fox_tuxedo/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..789ac505a31a85d157b3d35005848f30684e47a3
--- /dev/null
+++ b/examples/fox_tuxedo/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d086775b1bba7851c4e6867cf95518024f9abe2df7290694c74f244da5e6aa
+size 202552
diff --git a/examples/gazebo_disney/edited.png b/examples/gazebo_disney/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..83027f11c67ed6e9802b5f29fb9c4f20e91637a3
--- /dev/null
+++ b/examples/gazebo_disney/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7ac343b7a29a30a8c18ff8d532f8faf484edfc038596584b168be81f6c48a0a
+size 208864
diff --git a/examples/gazebo_disney/src.png b/examples/gazebo_disney/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_disney/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_disney/src_mv.png b/examples/gazebo_disney/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_disney/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_light/edited.png b/examples/gazebo_light/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a249a7e75b051e9eb737935ab3f76c5082cd7bf7
--- /dev/null
+++ b/examples/gazebo_light/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e249be567fa40011a4a182b342e5279e8ac07dabf580f09e23d0ead7168980
+size 227478
diff --git a/examples/gazebo_light/src.png b/examples/gazebo_light/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_light/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_light/src_mv.png b/examples/gazebo_light/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_light/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_pagoda/edited.png b/examples/gazebo_pagoda/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..bf8c54a3f7de0ac7a32201328f8287d9ded4db59
--- /dev/null
+++ b/examples/gazebo_pagoda/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42445e7493b114bd187da22176ce4f7197da4e629b2d13c7b0e3470c00f30a9e
+size 640285
diff --git a/examples/gazebo_pagoda/src.png b/examples/gazebo_pagoda/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_pagoda/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_pagoda/src_mv.png b/examples/gazebo_pagoda/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_pagoda/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_roof/edited.png b/examples/gazebo_roof/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..22c8e34ae73b0730b88eb304ac7d6129efc1db10
--- /dev/null
+++ b/examples/gazebo_roof/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df85b8c94f29f514bfe07d0fb6705584c7c1b5b1c69fbe8255c027429b4ccab
+size 471945
diff --git a/examples/gazebo_roof/src.png b/examples/gazebo_roof/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_roof/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_roof/src_mv.png b/examples/gazebo_roof/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_roof/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_rust/edited.png b/examples/gazebo_rust/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..c6bc296a9196f6aac7df688084c15c32b25417f6
--- /dev/null
+++ b/examples/gazebo_rust/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a54def20e1899cdf42f83375d0f45bcf2c66710e76038a1e7f902117b9fc2f6e
+size 234300
diff --git a/examples/gazebo_rust/src.png b/examples/gazebo_rust/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_rust/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_rust/src_mv.png b/examples/gazebo_rust/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_rust/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/german-shep_pixar/edited.png b/examples/german-shep_pixar/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..fcc857306304761aaeb2bab50f7ce5d0a2d7c5b0
--- /dev/null
+++ b/examples/german-shep_pixar/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:536b7efc741d1800a8f1e4d293c9673051e9125e20464c28d4821da3fa35cbdc
+size 87295
diff --git a/examples/german-shep_pixar/src.png b/examples/german-shep_pixar/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..252cbfebfb89f45c66dabd334e4843ed9630c3f0
--- /dev/null
+++ b/examples/german-shep_pixar/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67f6b29caff5c68f8c8f177ebd71b54870bec69a1e2f2221b54f23cb5a321fb5
+size 105010
diff --git a/examples/german-shep_pixar/src_mv.png b/examples/german-shep_pixar/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..7e83f5b7848b3a3565ecb9fbcfd46c2e114bee38
--- /dev/null
+++ b/examples/german-shep_pixar/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce6937a6d716fc2920e60477de5d3bc46f361bc283e12abd4c2bd717cbe14356
+size 197799
diff --git a/examples/german-shep_plush/edited.png b/examples/german-shep_plush/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..f6c81cf38809e25b3100be4a5c15a3621a2691ae
--- /dev/null
+++ b/examples/german-shep_plush/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:041a871f13a42f277121dcd95544a8aa3f64dcec2d9372bfbebae5c419feaf75
+size 120424
diff --git a/examples/german-shep_plush/src.png b/examples/german-shep_plush/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..252cbfebfb89f45c66dabd334e4843ed9630c3f0
--- /dev/null
+++ b/examples/german-shep_plush/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67f6b29caff5c68f8c8f177ebd71b54870bec69a1e2f2221b54f23cb5a321fb5
+size 105010
diff --git a/examples/german-shep_plush/src_mv.png b/examples/german-shep_plush/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..7e83f5b7848b3a3565ecb9fbcfd46c2e114bee38
--- /dev/null
+++ b/examples/german-shep_plush/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce6937a6d716fc2920e60477de5d3bc46f361bc283e12abd4c2bd717cbe14356
+size 197799
diff --git a/examples/green-dragon_skirt/edited.png b/examples/green-dragon_skirt/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..d2128a7f490506f5b6415a73405ed57f31d7b587
--- /dev/null
+++ b/examples/green-dragon_skirt/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71a744084e3a388c1dd1c71abd9a9808d1a2bd1295ec89e4915176175f4c6010
+size 323566
diff --git a/examples/green-dragon_skirt/src.png b/examples/green-dragon_skirt/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2f102f2262d8172a0346903405d75aecd194624
--- /dev/null
+++ b/examples/green-dragon_skirt/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45e7272e980a9ed09065f7ab59c9b10a75b81f3ea5fcce7a90b075e1b5735d13
+size 157831
diff --git a/examples/green-dragon_skirt/src_mv.png b/examples/green-dragon_skirt/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..6aad0c36e278d23212656e26fc9170e73d5f0d49
--- /dev/null
+++ b/examples/green-dragon_skirt/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9169a7378a7ae079f392ccf5a15e440de0c93b35dd1ab4c732817ad4371c40e
+size 325560
diff --git a/examples/grogu_earphones/edited.png b/examples/grogu_earphones/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..d65a477bc3e60958f45e82ff509022b499d6878a
--- /dev/null
+++ b/examples/grogu_earphones/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f09740650379238901fc0eb63ade8a8ddc2b9a7cba0c929bff6d8dc17c202397
+size 761794
diff --git a/examples/grogu_earphones/src.png b/examples/grogu_earphones/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_earphones/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_earphones/src_mv.png b/examples/grogu_earphones/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_earphones/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/grogu_kimono/edited.png b/examples/grogu_kimono/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a8888c24b5a5d1187ce0d15aa9d435c9f6ca9c3c
--- /dev/null
+++ b/examples/grogu_kimono/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b80bbd8d906c0d92a3e56f7f962ae1790ef187c28c3676eef42a7b88dd49796
+size 541011
diff --git a/examples/grogu_kimono/src.png b/examples/grogu_kimono/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_kimono/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_kimono/src_mv.png b/examples/grogu_kimono/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_kimono/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/grogu_lego-fig/edited.png b/examples/grogu_lego-fig/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ec4f1387aa3e0bfc38d885a90c0611313990c87a
--- /dev/null
+++ b/examples/grogu_lego-fig/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63c9b367e5a01a8fcec4ab2cb8d3638fe70d012d3ebd07f0ef40769858fd1029
+size 278338
diff --git a/examples/grogu_lego-fig/src.png b/examples/grogu_lego-fig/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_lego-fig/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_lego-fig/src_mv.png b/examples/grogu_lego-fig/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_lego-fig/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/grogu_the-force/edited.png b/examples/grogu_the-force/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..9467dacad281beee48aa5407fb2ab435a1e8b105
--- /dev/null
+++ b/examples/grogu_the-force/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41d9b992b2dc5e5e0b7a58ca9a2acdcd3c2104504442bd3b47c1292d3c6f7af8
+size 710198
diff --git a/examples/grogu_the-force/src.png b/examples/grogu_the-force/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_the-force/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_the-force/src_mv.png b/examples/grogu_the-force/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_the-force/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/lego-car_spoiler/edited.png b/examples/lego-car_spoiler/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..c8b6dbed5f64c6f219900c2a0454b796c51b32e8
--- /dev/null
+++ b/examples/lego-car_spoiler/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d0356a19c21059941e58f4a4da071ca75c9ecec7311cfdac2e99b1672862da6
+size 314538
diff --git a/examples/lego-car_spoiler/src.png b/examples/lego-car_spoiler/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..0c86acf4508a666e09487093240e6ada436e4d35
--- /dev/null
+++ b/examples/lego-car_spoiler/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86e13c3942880537e9facb8b5361671398e70c7d95249a9735c30fccf877907a
+size 156501
diff --git a/examples/lego-car_spoiler/src_mv.png b/examples/lego-car_spoiler/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..aae7a927f1c1211e8137e699cdada1305af289c4
--- /dev/null
+++ b/examples/lego-car_spoiler/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12ee65894bb3ea07aa8850c94342b0ae059d7aed0c7bca5d51b046580b49510e
+size 256467
diff --git a/examples/nurse_sporty/edited.png b/examples/nurse_sporty/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..2c72cfc51f79b0f1867888d422960cf550bf9fd1
--- /dev/null
+++ b/examples/nurse_sporty/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d09b6a9bb40c97f0317c6fb1460bccf73bcf02dba72b91ba05bfc0fbd5ffcd0
+size 94325
diff --git a/examples/nurse_sporty/src.png b/examples/nurse_sporty/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..b806340b3528e4f587f69d046a23423597d9f78a
--- /dev/null
+++ b/examples/nurse_sporty/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a97558a8532d476b436a258240cb5ad1d9303efd945851ed69f897f2aa573030
+size 84455
diff --git a/examples/nurse_sporty/src_mv.png b/examples/nurse_sporty/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..0974376bbbbab36414c71fcd721a21d4b700de85
--- /dev/null
+++ b/examples/nurse_sporty/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97a9e32374ce4fab1e12063a78156a6bcd134488a68d2f9d92fb3e2703fde132
+size 146028
diff --git a/examples/oasis_magical/edited.png b/examples/oasis_magical/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..822dc1ec8c88731edd965541c29095705377bc6e
--- /dev/null
+++ b/examples/oasis_magical/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca245181688fa4d777f95c93ab26c8c432bb7c1e49df17706828bd720161ee0
+size 198404
diff --git a/examples/oasis_magical/src.png b/examples/oasis_magical/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..33b03c8a72617369bd98a406b2c7f364746d6045
--- /dev/null
+++ b/examples/oasis_magical/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6de9c44ea90542208df5a24933ea2177201460a9fbd5e715ffa394715568d5fb
+size 220033
diff --git a/examples/oasis_magical/src_mv.png b/examples/oasis_magical/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ddd65dd68da26c1c9384111ded9ec0f4de346b8
--- /dev/null
+++ b/examples/oasis_magical/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:397b18085dcb8ffc4daa6b05554f1533f3c072602f4fc74ec6caf08303cd83f2
+size 340066
diff --git a/examples/r2d2_golden/edited.png b/examples/r2d2_golden/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5347a2ebe623f0614703160889f0354334a16a15
--- /dev/null
+++ b/examples/r2d2_golden/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a689f6d7f47b7b66205c40ba6fbed2d570f4cae712ecb7ca14fff199beb6bdd
+size 215874
diff --git a/examples/r2d2_golden/src.png b/examples/r2d2_golden/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..fb9d09162edc802d18713e96d3fcc19718983b1d
--- /dev/null
+++ b/examples/r2d2_golden/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfaefd28dc01be8b9b2b27a67979d2ce06812a75283ed2077cd219e045095c57
+size 231139
diff --git a/examples/r2d2_golden/src_mv.png b/examples/r2d2_golden/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..5cc933f06a1ccd835d8c54702397c98530bd4ed3
--- /dev/null
+++ b/examples/r2d2_golden/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7ea59150542e22b01b34b643bb6cd28d50dc2b0701dcfc4c0c946303f023d3
+size 421552
diff --git a/examples/red-dragon_tail/edited.png b/examples/red-dragon_tail/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ada7e3739dde1f380a779034ff5277728e6b970
--- /dev/null
+++ b/examples/red-dragon_tail/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a599d457f4e1e901fd8eacf6e865b15a0227f7deddcf0ce7756e74b5809ec5
+size 282077
diff --git a/examples/red-dragon_tail/src.png b/examples/red-dragon_tail/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..a56934b2cde7172665f9b784ff0dca7c9762085d
--- /dev/null
+++ b/examples/red-dragon_tail/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f416ee4d117c2eb7741d8c99b606a688eaaa1fae4cf4c307cce031053c58a1f
+size 125444
diff --git a/examples/red-dragon_tail/src_mv.png b/examples/red-dragon_tail/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..aab5feffc89d8886ec0ce99de492cc7cd48227f6
--- /dev/null
+++ b/examples/red-dragon_tail/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c0e0be7dc35d10839871541d6f85b365e0c7da0e7b64065ac0955686eced885
+size 241876
diff --git a/examples/robot_sunglasses/edited.png b/examples/robot_sunglasses/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..0928c0c161324a36a00f68363f94fbb749d5f7ec
--- /dev/null
+++ b/examples/robot_sunglasses/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1baecfc567dcd06459a677c7d254fade2e73c9f52405abdaa42fb2665d5153eb
+size 439986
diff --git a/examples/robot_sunglasses/output/expected.png b/examples/robot_sunglasses/output/expected.png
new file mode 100644
index 0000000000000000000000000000000000000000..27934677ce93790e6750868a573dd2694795dcc1
--- /dev/null
+++ b/examples/robot_sunglasses/output/expected.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1024252ad60faf364ebc3de593c483dea358daf4b30b7664892cba4ced799ef
+size 485565
diff --git a/examples/robot_sunglasses/src.png b/examples/robot_sunglasses/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ce63f43896eb27bd200e062ef6dc386fbe2a2a8
--- /dev/null
+++ b/examples/robot_sunglasses/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61bb64b6314a7c36f08d87b7fc97f44b7b34b99a8635a907572319ef05ee5274
+size 215827
diff --git a/examples/robot_sunglasses/src_mv.png b/examples/robot_sunglasses/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..b338a757a412aaf9d3f265e9144c5644bb35fbd7
--- /dev/null
+++ b/examples/robot_sunglasses/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f8161aa18b17a26b31bf6c59f8b518107928af0220ba789c4405e6adf49a2d6
+size 387049
diff --git a/examples/ship_fantasy/edited.png b/examples/ship_fantasy/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..67eb897032ec2b5216ab9ae8bd811d49dbbb5271
--- /dev/null
+++ b/examples/ship_fantasy/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5d1e1fa480e8c2a6907d3e54ae2294abff64edf5e2bffb23d58aeee21c8c729
+size 203636
diff --git a/examples/ship_fantasy/src.png b/examples/ship_fantasy/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..aec97a75ad905ca012216c4eaca115510a725cee
--- /dev/null
+++ b/examples/ship_fantasy/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72dd2048af05effb5d12cb45322b91ae419a19ade7f8b738fa5c25f00201fa32
+size 197493
diff --git a/examples/ship_fantasy/src_mv.png b/examples/ship_fantasy/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..b02af274a2ce27ebf8a75e967228c4ded2e5b0dd
--- /dev/null
+++ b/examples/ship_fantasy/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1dddd093e479c05f86d12dd0991bbbe0aa8474eadf74fe4c008957e869cb6c2
+size 366609
diff --git a/examples/spiderbot_chrome/edited.png b/examples/spiderbot_chrome/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa225500eed933720b8b51ebe238d1b57aa90128
--- /dev/null
+++ b/examples/spiderbot_chrome/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:679c255f8a83a36a12bf51b09b2c858bdc36eac1fa0b042f7a2b980e66d62fa1
+size 177923
diff --git a/examples/spiderbot_chrome/src.png b/examples/spiderbot_chrome/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..c0e37031b187d3d73cb67f958d13f18699e98e52
--- /dev/null
+++ b/examples/spiderbot_chrome/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9171f3b894e12f8c9f5a821ee22a265e0189605f44639bf1d9d24fe1ce2b3596
+size 189699
diff --git a/examples/spiderbot_chrome/src_mv.png b/examples/spiderbot_chrome/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..13b663701bfe2876b321204c01695cde99a5a9a3
--- /dev/null
+++ b/examples/spiderbot_chrome/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:693d830ad2c15b7e7d34ce7a4669642c8a7635c8eb7aa83daafe66e29d1889f2
+size 363038
diff --git a/examples/spiderbot_steampunk/edited.png b/examples/spiderbot_steampunk/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..42a095b3c9a6bc194e0e8f2f82f52757092dae78
--- /dev/null
+++ b/examples/spiderbot_steampunk/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:466fa790fb3e8671e0981f3c3e6e2c4e141d2f14184aaa89563cae78087db236
+size 173473
diff --git a/examples/spiderbot_steampunk/src.png b/examples/spiderbot_steampunk/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..c0e37031b187d3d73cb67f958d13f18699e98e52
--- /dev/null
+++ b/examples/spiderbot_steampunk/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9171f3b894e12f8c9f5a821ee22a265e0189605f44639bf1d9d24fe1ce2b3596
+size 189699
diff --git a/examples/spiderbot_steampunk/src_mv.png b/examples/spiderbot_steampunk/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..13b663701bfe2876b321204c01695cde99a5a9a3
--- /dev/null
+++ b/examples/spiderbot_steampunk/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:693d830ad2c15b7e7d34ce7a4669642c8a7635c8eb7aa83daafe66e29d1889f2
+size 363038
diff --git a/examples/stormtrooper_donut/edited.png b/examples/stormtrooper_donut/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..890046731a9beeb6e18cd1e6d68e2f562d684e18
--- /dev/null
+++ b/examples/stormtrooper_donut/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25b618f0adb791ef54b3e8a1721703a17934bd07d1c37a7f3e77d0a0b96b181f
+size 234289
diff --git a/examples/stormtrooper_donut/src.png b/examples/stormtrooper_donut/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..2fa852ec04d2c269361b57d6f30239a7a1394438
--- /dev/null
+++ b/examples/stormtrooper_donut/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f301631774f0c6a471ceb0be1dc3395cab330034d5eb2cb7b4470ed9af03c414
+size 115832
diff --git a/examples/stormtrooper_donut/src_mv.png b/examples/stormtrooper_donut/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..63d254c766929d7e4a3ca0d5c729c06398121d85
--- /dev/null
+++ b/examples/stormtrooper_donut/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:389e58719d4054061a5d9437857a952ca76d81c72484fe8092c300c26034c7d6
+size 203964
diff --git a/examples/superman_crossed/edited.png b/examples/superman_crossed/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..e1c95f17feb986739a2e1367c407f49e6d272c76
--- /dev/null
+++ b/examples/superman_crossed/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6cff6b2d03937c94e86f9d2a767abcf35d353739375266d11f8ffc7864a4b77
+size 218512
diff --git a/examples/superman_crossed/src.png b/examples/superman_crossed/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..056e5942048104645162208b27a423162becd0d1
--- /dev/null
+++ b/examples/superman_crossed/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c02def0b51c7948e60ed2d15f39f27ba2ff6db1f25a466ba1708226fe420420
+size 113692
diff --git a/examples/superman_crossed/src_mv.png b/examples/superman_crossed/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..8fadc3b4c62ec39cf042064fdbfcb01b75737bd2
--- /dev/null
+++ b/examples/superman_crossed/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9001135f9c58eb2f5d5485d7994623b279bc075c02d4185c5f5233f7e585971
+size 184764
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88fadec2914b79f46eb5b4e7eec0f9cca3a99a63
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+torch
+torchvision
+diffusers==0.30.1
+transformers
+accelerate
+pillow
+huggingface_hub
+numpy
+tqdm
+gradio
\ No newline at end of file
diff --git a/resources/mv-gallery/1/edit/0.png b/resources/mv-gallery/1/edit/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc9ce253c1f9cd270c2cb511e5908e821e82d1ca
--- /dev/null
+++ b/resources/mv-gallery/1/edit/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2bf89561c1d1a0b3edbf26c436f3dd465c6a606b516ae05760505464d221fa9
+size 45127
diff --git a/resources/mv-gallery/1/edit/1.png b/resources/mv-gallery/1/edit/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6eb65e9516178287cb67cc2b7159f13e1f1296dc
--- /dev/null
+++ b/resources/mv-gallery/1/edit/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20188d7a128cdc1fbf9127fa1bf84bbbed95bdf320fd6b51f49413d7e4b12861
+size 50163
diff --git a/resources/mv-gallery/1/edit/2.png b/resources/mv-gallery/1/edit/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..554b4d8325317ed5bdea68e2996294916c43b9a0
--- /dev/null
+++ b/resources/mv-gallery/1/edit/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ecd5ff28dccb9d4f9aff53fb84dc384750e1bfccd25bbc9b523f4efa89d19ad
+size 51846
diff --git a/resources/mv-gallery/1/edit/prompt.png b/resources/mv-gallery/1/edit/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..af8089b410c4714f97c38c76023cc7be54414884
--- /dev/null
+++ b/resources/mv-gallery/1/edit/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32ec575e7468054f8bd57f1db3a1a2831b99405887d4951d45011cb179644633
+size 40538
diff --git a/resources/mv-gallery/1/edit2/0.png b/resources/mv-gallery/1/edit2/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..83d6f61013a69341d6863756f545676453d6ecbb
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d128a57586533fbb9df0d03d73ca8cd5e74e19056df11391286feeb25800d5
+size 56827
diff --git a/resources/mv-gallery/1/edit2/1.png b/resources/mv-gallery/1/edit2/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1cc12f7c2194d1724a513af541411a4115435561
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29154ac07b0a441fcbb9bce5bd97919682e79567704e3c62e8895fa237559bf0
+size 56044
diff --git a/resources/mv-gallery/1/edit2/2.png b/resources/mv-gallery/1/edit2/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..58a6e12e57dd89f89a2fba14c2ae4c693ccb6c06
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef7cbab69b050b10a87de160009773a5c70e8e2318912672a9beaef7eeb1ee5e
+size 48090
diff --git a/resources/mv-gallery/1/edit2/prompt.png b/resources/mv-gallery/1/edit2/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..432b5b52257dac250234432004030c94affe91dc
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e08897c4137140d422d738399f262d9aeb8456f77b068375aaaa350b3bc912cc
+size 53369
diff --git a/resources/mv-gallery/1/src/0.png b/resources/mv-gallery/1/src/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..26d49181272e8ce82838d1e9d45fb5e18aac3434
--- /dev/null
+++ b/resources/mv-gallery/1/src/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5e6c50641e3a4648d9991375108fe70e08f279b16412a3629117ae28a47557b
+size 30347
diff --git a/resources/mv-gallery/1/src/1.png b/resources/mv-gallery/1/src/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..915d442d7c58ebe7b15bbf01ce42c97b96eb48bc
--- /dev/null
+++ b/resources/mv-gallery/1/src/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b672ff174e56f2f3376c0351cf86f2a3f302c931bb54aabc91b9c9b13c72ca0
+size 34623
diff --git a/resources/mv-gallery/1/src/2.png b/resources/mv-gallery/1/src/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..723bcbcc757373e09a58e3ebd6e5c7c05b8dc396
--- /dev/null
+++ b/resources/mv-gallery/1/src/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a189c0e2665b627a5520ca84b1b8be6e6d0e3444f8f9aa86b6cb376d689ae00
+size 34818
diff --git a/resources/mv-gallery/1/src/prompt.png b/resources/mv-gallery/1/src/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..00c566b332e528b4de40f9d2f4a8d5dd44fc4f40
--- /dev/null
+++ b/resources/mv-gallery/1/src/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0d5fa8487834b51ffa89fb42b825fedb55e9902f3c4509c460c6aeab2c4d0d7
+size 34392
diff --git a/resources/mv-gallery/2/edit/0.png b/resources/mv-gallery/2/edit/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..2a2896bd2e86e2687e81eec08d78748bd16d6e40
--- /dev/null
+++ b/resources/mv-gallery/2/edit/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7defdf226cfda007f9566572ad4f10edee8b806b97491323ece3a831f9a7962
+size 38885
diff --git a/resources/mv-gallery/2/edit/1.png b/resources/mv-gallery/2/edit/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..38d5d83734390591e4a7a7d0107ca961439a6192
--- /dev/null
+++ b/resources/mv-gallery/2/edit/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d27c761282a7130e85c30e08b3406e640a21186c0cbafe94f576ebbe062acf37
+size 43875
diff --git a/resources/mv-gallery/2/edit/2.png b/resources/mv-gallery/2/edit/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..43a345337e5493ee175f4e2e01406924794cda29
--- /dev/null
+++ b/resources/mv-gallery/2/edit/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:796bc56899c91f7c7bfa08c8c3278e2c8b5932a257c2737e1245b99d44416574
+size 32073
diff --git a/resources/mv-gallery/2/edit/prompt.png b/resources/mv-gallery/2/edit/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..cc5af485413e2f175a74352f9b9649ae1d4c9c67
--- /dev/null
+++ b/resources/mv-gallery/2/edit/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f194ba3af273b6f70fd59da0af7df80801c7add0406efb654c09e153cb091e8
+size 38996
diff --git a/resources/mv-gallery/2/edit2/0.png b/resources/mv-gallery/2/edit2/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..22ef4ee58d1ff361de837984503308a76101b092
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:984cb2a4301a7a75126071dd2e4a8a07b66401ac7035b15a7771f5a92f7ecf05
+size 36494
diff --git a/resources/mv-gallery/2/edit2/1.png b/resources/mv-gallery/2/edit2/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..fcabc9593db476657c496b9230cde1b23db36b76
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd8d641de2b78ce883decdddfa744f14008421d7b9a597bca837225c04b1ca4
+size 42852
diff --git a/resources/mv-gallery/2/edit2/2.png b/resources/mv-gallery/2/edit2/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..7be7a7bccaf51eb2e1d6e20703356f0f74a5a72e
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f982d22a2f0fc154207fa5c8f5ad7d3c1abcd651a5686d71f9f388217bc5b97
+size 32548
diff --git a/resources/mv-gallery/2/edit2/prompt.png b/resources/mv-gallery/2/edit2/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3edd6c78ecb702bb1722f956e8f5a2af10d1c61
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbdde3c1562dc3406dd4ce50a61a966b4817178d907b9ca5c4ffd32d9366495c
+size 33971
diff --git a/resources/mv-gallery/2/src/0.png b/resources/mv-gallery/2/src/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..11a2c348ff4e8f76d1a31b7e0936e7709d83bf87
--- /dev/null
+++ b/resources/mv-gallery/2/src/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8204713b64298624d1c703288c2342f0f25f4bc44b330f5fcbddf6fe39b15bb
+size 27094
diff --git a/resources/mv-gallery/2/src/1.png b/resources/mv-gallery/2/src/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..11a2c348ff4e8f76d1a31b7e0936e7709d83bf87
--- /dev/null
+++ b/resources/mv-gallery/2/src/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8204713b64298624d1c703288c2342f0f25f4bc44b330f5fcbddf6fe39b15bb
+size 27094
diff --git a/resources/mv-gallery/2/src/2.png b/resources/mv-gallery/2/src/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa75c36bae555d5bfdf98fd41e2c66e63ea8fe81
--- /dev/null
+++ b/resources/mv-gallery/2/src/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd5c079793b69790c889dd3968ed607b2eb292d5f671d0d3358c8c7e0153b65
+size 30177
diff --git a/resources/mv-gallery/2/src/prompt.png b/resources/mv-gallery/2/src/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..46f29a29751c9be35466105df7c56e20532a9fff
--- /dev/null
+++ b/resources/mv-gallery/2/src/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:338c3769e7e5661c7c6fbfbbb070ff964dfc2dd3bbc966760f40260e6cbb8c31
+size 34068
diff --git a/scripts/img2mv.py b/scripts/img2mv.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f14f54dcb615399223084663a376ec2f5e08469
--- /dev/null
+++ b/scripts/img2mv.py
@@ -0,0 +1,97 @@
+import argparse
+import sys
+from pathlib import Path
+from typing import Optional
+
+# --- Start of the "Messy" but Effective Path Setup ---
+# This block ensures that imports work correctly without modifying the src directory.
+# It adds both the project root and the src directory to the Python path.
+try:
+ # Get the project root directory (which is the parent of the 'scripts' directory)
+ project_root = Path(__file__).resolve().parent.parent
+ # Get the source code directory
+ src_dir = project_root / "src"
+
+ # Add both directories to the system path
+ sys.path.insert(0, str(project_root))
+ sys.path.insert(0, str(src_dir))
+except IndexError:
+ # Fallback for when the script is run in a way that __file__ is not defined
+ print("Could not determine project root. Please run from the 'scripts' directory.")
+ sys.exit(1)
+# --- End of Path Setup ---
+
+import torch
+from PIL import Image
+
+from pipeline import Zero123PlusPipeline # This now works because src/ is on the path
+from utils import add_white_bg, load_z123_pipe
+
+
+def generate_from_single_view(
+ input_path: Path,
+ output_path: Path,
+ device_number: int = 0,
+ pipeline: Optional[Zero123PlusPipeline] = None,
+) -> None:
+ """
+ Generates a multi-view image grid from a single input image.
+
+ Args:
+ input_path: Path to the single input image.
+ output_path: Path to save the generated multi-view .png file.
+ device_number: The GPU device number to use.
+ pipeline: An optional pre-loaded pipeline instance.
+ """
+ if not input_path.is_file():
+ raise FileNotFoundError(f"Input image not found at: {input_path}")
+
+ print(f"Loading pipeline on device {device_number}...")
+ if pipeline is None:
+ pipeline = load_z123_pipe(device_number)
+
+ print(f"Processing input image: {input_path}")
+ cond_image = Image.open(input_path)
+ cond_image = add_white_bg(cond_image)
+
+ print("Generating multi-view grid...")
+ result = pipeline(cond_image, num_inference_steps=75).images[0]
+
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ result.save(output_path)
+ print(f"Successfully saved multi-view grid to: {output_path}")
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description="Generate a multi-view image grid from a single input view using Zero123++."
+ )
+ parser.add_argument(
+ "--input_image",
+ type=Path,
+ required=True,
+ help="Path to the single input image file (e.g., examples/robot_sunglasses/src.png)."
+ )
+ parser.add_argument(
+ "--output_path",
+ type=Path,
+ required=True,
+ help="Path to save the output multi-view grid (e.g., examples/robot_sunglasses/src_mv.png)."
+ )
+ parser.add_argument(
+ "--device_number",
+ type=int,
+ default=0,
+ help="GPU device number to use for generation."
+ )
+ args = parser.parse_args()
+
+ try:
+ generate_from_single_view(
+ input_path=args.input_image,
+ output_path=args.output_path,
+ device_number=args.device_number
+ )
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ sys.exit(1)
diff --git a/scripts/recon.py b/scripts/recon.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bdf641aaec6bbd1be0e19fcbd79bde956d57f7d
--- /dev/null
+++ b/scripts/recon.py
@@ -0,0 +1,219 @@
+import os
+import argparse
+import numpy as np
+import torch
+from PIL import Image
+from torchvision.transforms import v2
+from omegaconf import OmegaConf
+from einops import rearrange
+from tqdm import tqdm
+from huggingface_hub import hf_hub_download
+import sys
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+submodule_path = os.path.join(script_dir, "..", "external", "instant-mesh")
+sys.path.insert(0, submodule_path)
+
+from src.utils.camera_util import (
+ get_circular_camera_poses,
+ get_zero123plus_input_cameras,
+ FOV_to_intrinsics,
+)
+from src.utils.train_util import instantiate_from_config
+from src.utils.mesh_util import save_obj
+from src.utils.infer_util import save_video
+
+
+def get_render_cameras(
+ batch_size=1, M=120, radius=4.0, elevation=20.0, is_flexicubes=False
+):
+ c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
+ if is_flexicubes:
+ cameras = torch.linalg.inv(c2ws)
+ cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
+ else:
+ extrinsics = c2ws.flatten(-2)
+ intrinsics = (
+ FOV_to_intrinsics(30.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
+ )
+ cameras = torch.cat([extrinsics, intrinsics], dim=-1)
+ cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
+ return cameras
+
+
+def render_frames(
+ model, planes, render_cameras, render_size=512, chunk_size=1, is_flexicubes=False
+):
+ frames = []
+ for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
+ if is_flexicubes:
+ frame = model.forward_geometry(
+ planes, render_cameras[:, i : i + chunk_size], render_size=render_size
+ )["img"]
+ else:
+ frame = model.forward_synthesizer(
+ planes, render_cameras[:, i : i + chunk_size], render_size=render_size
+ )["images_rgb"]
+ frames.append(frame)
+ frames = torch.cat(frames, dim=1)[0]
+ return frames
+
+def main(args):
+ """
+ Main function to run the 3D mesh generation process.
+ """
+ # ============================
+ # CONFIG
+ # ============================
+ print("š Starting 3D mesh generation...")
+ config = OmegaConf.load(args.config)
+ config_name = os.path.basename(args.config).replace(".yaml", "")
+ model_config = config.model_config
+ infer_config = config.infer_config
+ IS_FLEXICUBES = config_name.startswith("instant-mesh")
+
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ print(f"Using device: {device}")
+
+ # ============================
+ # SETUP OUTPUT DIRECTORY
+ # ============================
+ os.makedirs(args.output_dir, exist_ok=True)
+ base_name = os.path.splitext(os.path.basename(args.input_file))[0]
+ mesh_path = os.path.join(args.output_dir, "recon.obj")
+ video_path = os.path.join(args.output_dir, "recon.mp4")
+
+ # ============================
+ # LOAD RECONSTRUCTION MODEL
+ # ============================
+ print("Loading reconstruction model...")
+ model = instantiate_from_config(model_config)
+
+ # Download model checkpoint if it doesn't exist
+ model_ckpt_path = (
+ infer_config.model_path
+ if os.path.exists(infer_config.model_path)
+ else hf_hub_download(
+ repo_id="TencentARC/InstantMesh",
+ filename=f"{config_name.replace('-', '_')}.ckpt",
+ repo_type="model",
+ )
+ )
+
+ # Load the state dictionary
+ state_dict = torch.load(model_ckpt_path, map_location="cpu")["state_dict"]
+ state_dict = {
+ k[14:]: v for k, v in state_dict.items() if k.startswith("lrm_generator.")
+ }
+ model.load_state_dict(state_dict, strict=True)
+ model = model.to(device).eval()
+
+ if IS_FLEXICUBES:
+ model.init_flexicubes_geometry(device, fovy=30.0)
+
+ # ============================
+ # PREPARE DATA
+ # ============================
+ print(f"Processing input file: {args.input_file}")
+
+ # Load and preprocess the input image
+ input_image = Image.open(args.input_file).convert("RGB")
+ images = np.asarray(input_image, dtype=np.float32) / 255.0
+ images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
+ # Rearrange from (C, H, W) to (B, C, H, W) where B is the number of views
+ images = rearrange(images, "c (n h) (m w) -> (n m) c h w", n=3, m=2)
+ images = images.unsqueeze(0).to(device)
+ images = v2.functional.resize(images, size=320, interpolation=3, antialias=True).clamp(0, 1)
+
+ input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0 * args.scale).to(device)
+
+ # ============================
+ # RUN INFERENCE AND SAVE OUTPUT
+ # ============================
+ with torch.no_grad():
+ # Generate 3D mesh
+ planes = model.forward_planes(images, input_cameras)
+ mesh_out = model.extract_mesh(planes, use_texture_map=False, **infer_config)
+
+ # Save the mesh
+ vertices, faces, vertex_colors = mesh_out
+ save_obj(vertices, faces, vertex_colors, mesh_path)
+ print(f"ā
Mesh saved to {mesh_path}")
+
+ # Render and save video if enabled
+ if args.save_video:
+ print("š„ Rendering video...")
+ render_size = infer_config.render_resolution
+ chunk_size = 20 if IS_FLEXICUBES else 1
+ render_cameras = get_render_cameras(
+ batch_size=1,
+ M=120,
+ radius=args.distance,
+ elevation=20.0,
+ is_flexicubes=IS_FLEXICUBES,
+ ).to(device)
+
+ frames = render_frames(
+ model=model,
+ planes=planes,
+ render_cameras=render_cameras,
+ render_size=render_size,
+ chunk_size=chunk_size,
+ is_flexicubes=IS_FLEXICUBES,
+ )
+ save_video(frames, video_path, fps=30)
+ print(f"ā
Video saved to {video_path}")
+
+ print("⨠Process complete.")
+
+if __name__ == "__main__":
+ # ============================
+ # SCRIPT ARGUMENTS
+ # ============================
+ parser = argparse.ArgumentParser(
+ description="Generate a 3D mesh and video from a single multi-view PNG file."
+ )
+
+ # Positional argument for config file
+ parser.add_argument(
+ "config",
+ type=str,
+ help="Path to the model config file (.yaml)."
+ )
+
+ # Required file paths
+ parser.add_argument(
+ "--input_file",
+ type=str,
+ required=True,
+ help="Path to the input PNG file."
+ )
+ parser.add_argument(
+ "--output_dir",
+ type=str,
+ default="outputs/",
+ help="Directory to save the output .obj and .mp4 files. Defaults to 'outputs/'."
+ )
+
+ # Optional parameters for model and rendering
+ parser.add_argument(
+ "--scale",
+ type=float,
+ default=1.0,
+ help="Scale of the input cameras."
+ )
+ parser.add_argument(
+ "--distance",
+ type=float,
+ default=4.5,
+ help="Camera distance for rendering the output video."
+ )
+ parser.add_argument(
+ "--no_video",
+ dest="save_video",
+ action="store_false",
+ help="If set, disables saving the output .mp4 video."
+ )
+
+ parsed_args = parser.parse_args()
+ main(parsed_args)
diff --git a/scripts/render_mesh.py b/scripts/render_mesh.py
new file mode 100644
index 0000000000000000000000000000000000000000..26dd7999249c35464cde9e26a83be5bddadb6eca
--- /dev/null
+++ b/scripts/render_mesh.py
@@ -0,0 +1,388 @@
+# This script is borrowed from https://github.com/allenai/objaverse-rendering
+import argparse
+import math
+import os
+from pathlib import Path
+import shutil
+from typing import Dict, Literal, Tuple
+
+import bpy
+from mathutils import Vector
+from PIL import Image
+
+
+
+# --- Blender Setup Functions ---
+def global_settings():
+ """Configures global Blender rendering settings."""
+ context = bpy.context
+ scene = context.scene
+ render = scene.render
+
+ render.engine = "CYCLES"
+ render.image_settings.file_format = "PNG"
+ render.image_settings.color_mode = "RGBA"
+ render.resolution_x = 512
+ render.resolution_y = 512
+ render.resolution_percentage = 100
+
+ scene.cycles.device = "GPU"
+ scene.cycles.samples = 32
+ scene.cycles.diffuse_bounces = 1
+ scene.cycles.glossy_bounces = 1
+ scene.cycles.transparent_max_bounces = 3
+ scene.cycles.transmission_bounces = 3
+ scene.cycles.filter_width = 0.01
+ scene.cycles.use_denoising = True
+ scene.render.film_transparent = True
+ return scene
+
+
+def add_lighting() -> None:
+ """Adds area lights to the scene."""
+ # Delete the default light
+ if "Light" in bpy.data.objects:
+ bpy.data.objects["Light"].select_set(True)
+ bpy.ops.object.delete()
+
+ # Add a new large area light
+ bpy.ops.object.light_add(type="AREA")
+ light2 = bpy.data.lights["Area"]
+ light2.energy = 30000
+ bpy.data.objects["Area"].location[2] = 0.5
+ bpy.data.objects["Area"].scale[0] = 100
+ bpy.data.objects["Area"].scale[1] = 100
+ bpy.data.objects["Area"].scale[2] = 100
+
+ # Add a fill light
+ bpy.ops.object.light_add(type="AREA", location=(0, 0, 2))
+ fill_obj = bpy.context.active_object
+ fill_obj.data.energy = 2000
+ fill_obj.scale = (10, 10, 10)
+
+
+def reset_scene() -> None:
+ """Resets the scene to a clean state by deleting all objects and data."""
+ # Delete all objects
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete()
+
+ # Delete all meshes
+ for block in bpy.data.meshes:
+ bpy.data.meshes.remove(block, do_unlink=True)
+
+ # Delete all materials
+ for material in bpy.data.materials:
+ bpy.data.materials.remove(material, do_unlink=True)
+
+ # Delete all textures
+ for texture in bpy.data.textures:
+ bpy.data.textures.remove(texture, do_unlink=True)
+
+ # Delete all images
+ for image in bpy.data.images:
+ bpy.data.images.remove(image, do_unlink=True)
+
+ # Delete all lights
+ for light in bpy.data.lights:
+ bpy.data.lights.remove(light, do_unlink=True)
+
+ # Delete all cameras
+ for cam in bpy.data.cameras:
+ bpy.data.cameras.remove(cam, do_unlink=True)
+
+ # Delete all empties and curves
+ for curve in bpy.data.curves:
+ bpy.data.curves.remove(curve, do_unlink=True)
+
+ # Reset world
+ if bpy.data.worlds:
+ for world in bpy.data.worlds:
+ bpy.data.worlds.remove(world, do_unlink=True)
+
+ # Create a new default world
+ bpy.context.scene.world = bpy.data.worlds.new("World")
+ bpy.context.view_layer.update()
+
+
+def load_object(object_path: str) -> None:
+ """Loads a 3D model into the scene based on its file extension."""
+ if object_path.endswith(".glb"):
+ bpy.ops.import_scene.gltf(filepath=object_path, merge_vertices=True)
+ elif object_path.endswith(".fbx"):
+ bpy.ops.import_scene.fbx(filepath=object_path)
+ else:
+ raise ValueError(f"Unsupported file type: {object_path}")
+
+
+# --- Scene Normalization and Utility Functions ---
+def scene_bbox(single_obj=None, ignore_matrix=False):
+ """Calculates the bounding box of the scene or a single object."""
+ bbox_min = (math.inf,) * 3
+ bbox_max = (-math.inf,) * 3
+ found = False
+ for obj in scene_meshes() if single_obj is None else [single_obj]:
+ found = True
+ for coord in obj.bound_box:
+ coord = Vector(coord)
+ if not ignore_matrix:
+ coord = obj.matrix_world @ coord
+ bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, coord))
+ bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, coord))
+ if not found:
+ raise RuntimeError("No objects in scene to compute bounding box for")
+ return Vector(bbox_min), Vector(bbox_max)
+
+
+def scene_root_objects():
+ """Generator for all root objects in the scene."""
+ for obj in bpy.context.scene.objects.values():
+ if not obj.parent:
+ yield obj
+
+
+def scene_meshes():
+ """Generator for all mesh objects in the scene."""
+ for obj in bpy.context.scene.objects.values():
+ if isinstance(obj.data, (bpy.types.Mesh)):
+ yield obj
+
+
+def normalize_scene(target_scale=1.0):
+ """Normalizes the scene: scales to fit target size and centers at the origin."""
+ bbox_min, bbox_max = scene_bbox()
+ size = bbox_max - bbox_min
+ max_dim = max(size.x, size.y, size.z)
+ if max_dim == 0:
+ raise ValueError("Model has zero size. Cannot normalize.")
+
+ scale = target_scale / max_dim
+ for obj in scene_root_objects():
+ obj.scale = obj.scale * scale
+
+ bpy.context.view_layer.update()
+
+ bbox_min, bbox_max = scene_bbox()
+ center = (bbox_min + bbox_max) * 0.5
+ for obj in scene_root_objects():
+ obj.location -= center
+
+ bpy.context.view_layer.update()
+
+
+# --- Camera and Lighting Setup ---
+def setup_camera(scene):
+ """Configures the camera and adds a tracking constraint."""
+ cam = scene.objects["Camera"]
+ cam.location = (0, 1.2, 0)
+ cam.data.lens = 35
+ cam.data.sensor_width = 32
+ cam_constraint = cam.constraints.new(type="TRACK_TO")
+ cam_constraint.track_axis = "TRACK_NEGATIVE_Z"
+ cam_constraint.up_axis = "UP_Y"
+ return cam, cam_constraint
+
+
+def _create_light(
+ name: str,
+ light_type: Literal["POINT", "SUN", "SPOT", "AREA"],
+ location: Tuple[float, float, float],
+ rotation: Tuple[float, float, float],
+ energy: float,
+ use_shadow: bool = False,
+ specular_factor: float = 1.0,
+) -> bpy.types.Object:
+ """Creates and returns a configured light object."""
+ light_data = bpy.data.lights.new(name=name, type=light_type)
+ light_object = bpy.data.objects.new(name, light_data)
+ bpy.context.collection.objects.link(light_object)
+
+ light_object.location = location
+ light_object.rotation_euler = rotation
+
+ light_data.energy = energy
+ light_data.use_shadow = use_shadow
+ light_data.specular_factor = specular_factor
+
+ return light_object
+
+
+def create_lighting() -> Dict[str, bpy.types.Object]:
+ """Creates a deterministic multi-directional sun lighting setup."""
+ # Remove existing lights
+ bpy.ops.object.select_all(action="DESELECT")
+ bpy.ops.object.select_by_type(type="LIGHT")
+ bpy.ops.object.delete()
+
+ # Add 4 deterministic sun lights
+ key_light = _create_light(
+ name="Key_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(0.785398, 0, -0.785398), # 45°, -45° in radians
+ energy=0.5,
+ )
+ fill_light = _create_light(
+ name="Fill_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(0.785398, 0, 2.35619), # 45°, 135°
+ energy=0.3,
+ )
+ rim_light = _create_light(
+ name="Rim_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(-0.785398, 0, -3.92699), # -45°, -225°
+ energy=0.5,
+ )
+ bottom_light = _create_light(
+ name="Bottom_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(3.14159, 0, 0), # 180° (from below)
+ energy=0.2,
+ )
+ return {
+ "key_light": key_light,
+ "fill_light": fill_light,
+ "rim_light": rim_light,
+ "bottom_light": bottom_light,
+ }
+
+
+# --- Main Rendering and Image Processing Functions ---
+def render_object(
+ object_file: str,
+ output_dir: str,
+ camera_views=[(30, 30, 1.5), (90, -20, 1.5), (150, 30, 1.5), (210, -20, 1.5), (270, 30, 1.5), (330, -20, 1.5)],
+ background_color=(255, 255, 255)
+) -> None:
+ """Renders images of an object from multiple camera views."""
+ scene = global_settings()
+ os.makedirs(output_dir, exist_ok=True)
+ reset_scene()
+
+ # Create and set up a new camera
+ bpy.ops.object.camera_add()
+ camera = bpy.context.object
+ camera.name = "Camera"
+ scene.collection.objects.link(camera)
+ scene.camera = camera
+
+ scene.view_settings.view_transform = 'Standard'
+
+ # Set background color
+ world = bpy.data.worlds["World"]
+ world.use_nodes = False
+ world.color = tuple(channel / 255 for channel in background_color)
+ scene.render.film_transparent = False
+ scene.world = world
+
+ # Load, normalize, and light the object
+ load_object(object_file)
+ normalize_scene()
+ create_lighting()
+ cam, cam_constraint = setup_camera(scene)
+
+ # Create an empty object for the camera to track
+ empty = bpy.data.objects.new("Empty", None)
+ scene.collection.objects.link(empty)
+ cam_constraint.target = empty
+
+ for i, (azim, elev, camera_dist) in enumerate(camera_views):
+ # Set camera position
+ theta = math.radians(azim)
+ phi = math.radians(elev)
+ point = (
+ camera_dist * math.cos(phi) * math.cos(theta),
+ camera_dist * math.cos(phi) * math.sin(theta),
+ camera_dist * math.sin(phi),
+ )
+ cam.location = point
+
+ # Render the image
+ render_path = os.path.join(output_dir, f"{i:02d}.png")
+ scene.render.filepath = render_path
+ bpy.ops.render.render(write_still=True)
+
+
+def create_tiled_grid(
+ image_paths=["00.png", "01.png", "02.png", "03.png", "04.png", "05.png"],
+ output_path="tiled_grid.png",
+ tile_width=320,
+ tile_height=320,
+ background_color=(255, 255, 255),
+):
+ """Creates a 2x3 tiled grid image from a list of six image paths."""
+ if len(image_paths) != 6:
+ print("Error: Exactly 6 image paths are required.")
+ return
+
+ grid_width = tile_width * 2
+ grid_height = tile_height * 3
+ grid_image = Image.new("RGB", (grid_width, grid_height), background_color)
+
+ for i, image_path in enumerate(image_paths):
+ img = Image.open(image_path)
+ img = img.resize((tile_width, tile_height))
+ # Handle transparency by pasting onto a solid background
+ if img.mode == "RGBA":
+ background = Image.new("RGB", (tile_width, tile_height), background_color)
+ background.paste(img, (0, 0), img)
+ img = background
+
+ x = (i % 2) * tile_width
+ y = (i // 2) * tile_height
+ grid_image.paste(img, (x, y))
+
+ grid_image.save(output_path)
+ print(f"Tiled grid image saved to: {output_path}")
+
+
+
+# --- Main Execution Block ---
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Render a 3D object into a multi-view and source image format for EditP23.")
+ parser.add_argument("--mesh_path", type=str, required=True, help="Path to the input .glb or .fbx file.")
+ parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the output src.png and src_mv.png.")
+ parser.add_argument("--camera_dist", type=float, default=1.35, help="Camera distance from the object.")
+ parser.add_argument("--azim_offset", type=float, default=0, help="Azimuthal offset for camera views in degrees.")
+ args = parser.parse_args()
+
+ RENDERS_SUBDIR = "all_renders"
+ BACKGROUND_COLOR = (255, 255, 255)
+
+ output_dir = Path(args.output_dir)
+ renders_path = output_dir / RENDERS_SUBDIR
+
+
+ ELEV_1 = 20
+ ELEV_2 = -10
+ elevs = [ELEV_1, ELEV_2] * 3
+ azims = [(30 + 60 * i + args.azim_offset) % 360 for i in range(6)]
+ camera_views = [(azim, elev, args.camera_dist) for azim, elev in zip(azims, elevs)] + [
+ ((0 + args.azim_offset) % 360, ELEV_1, args.camera_dist)
+ ]
+
+
+ # Render the object from different views
+ render_object(
+ args.mesh_path,
+ output_dir=str(renders_path),
+ camera_views=camera_views,
+ background_color=BACKGROUND_COLOR,
+ )
+
+ # --- Create Final Outputs ---
+ image_paths_for_grid = [renders_path / f"{i:02d}.png" for i in range(6)]
+
+ create_tiled_grid(
+ image_paths=image_paths_for_grid,
+ output_path=str(output_dir/"src_mv.png"),
+ background_color=BACKGROUND_COLOR,
+ )
+
+ shutil.copy(renders_path / "06.png", output_dir / "src.png")
+
+ print(f"Saved conditioning view and multi-view grid to {renders_path}.")
diff --git a/src/edit_mv.py b/src/edit_mv.py
new file mode 100644
index 0000000000000000000000000000000000000000..0781bfe83acd0fbc692ce59134b40dbffe443723
--- /dev/null
+++ b/src/edit_mv.py
@@ -0,0 +1,177 @@
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+
+from diffusers import DDPMScheduler
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import retrieve_timesteps
+from pipeline import Zero123PlusPipeline
+from utils import add_white_bg, load_z123_pipe
+from typing import Optional
+
+class VAEProcessor:
+ """A helper class to handle encoding and decoding images with the VAE."""
+ def __init__(self, pipeline: Zero123PlusPipeline):
+ self.pipe = pipeline
+ self.image_processor = pipeline.image_processor
+ self.vae = pipeline.vae
+
+ self.latent_shift_factor = 0.22
+ self.latent_scale_factor = 0.75
+ self.image_scale_factor = 0.5 / 0.8
+
+ def encode(self, image: Image.Image) -> torch.Tensor:
+ """Encodes a PIL image into the latent space."""
+ image_tensor = self.image_processor.preprocess(image).to(self.vae.device).half()
+ with torch.autocast("cuda"), torch.inference_mode():
+ image_tensor *= self.image_scale_factor
+ denorm = self.vae.encode(image_tensor).latent_dist.mode()
+ denorm *= self.vae.config.scaling_factor
+ return (denorm - self.latent_shift_factor) * self.latent_scale_factor
+
+ def decode(self, latents: torch.Tensor) -> Image.Image:
+ """Decodes latents back into a post-processed image."""
+ with torch.autocast("cuda"), torch.inference_mode():
+ denorm = latents / self.latent_scale_factor + self.latent_shift_factor
+ image = self.vae.decode(denorm / self.vae.config.scaling_factor, return_dict=False)[0]
+ image /= self.image_scale_factor
+ return self.image_processor.postprocess(image)
+
+
+class EditAwareDenoiser:
+ """Encapsulates the entire Edit-Aware Denoising process."""
+ def __init__(self, pipe: Zero123PlusPipeline, scheduler: DDPMScheduler, T_steps: int, src_gs: float, tar_gs: float, n_max: int):
+ """Initializes the denoiser with the pipeline and configuration."""
+ self.pipe = pipe
+ self.scheduler = scheduler
+ self.T_steps = T_steps
+ self.src_guidance_scale = src_gs
+ self.tar_guidance_scale = tar_gs
+ self.n_max = n_max
+
+ @staticmethod
+ def _mix_cfg(cond: torch.Tensor, uncond: torch.Tensor, cfg: float) -> torch.Tensor:
+ """Mixes conditional and unconditional predictions."""
+ return uncond + cfg * (cond - uncond)
+
+ def _get_differential_edit_direction(self, t: torch.Tensor, zt_src: torch.Tensor, zt_tar: torch.Tensor) -> torch.Tensor:
+ """Computes the differential edit direction (delta v) for a timestep."""
+ condition_noise = torch.randn_like(self.src_cond_lat)
+
+ noisy_src_cond_lat = self.pipe.scheduler.scale_model_input(
+ self.pipe.scheduler.add_noise(self.src_cond_lat, condition_noise, t), t
+ )
+ vt_src_uncond, vt_src_cond = self._calc_v_zero(self.src_cond_img, zt_src, t, noisy_src_cond_lat)
+ vt_src = self._mix_cfg(vt_src_cond, vt_src_uncond, self.src_guidance_scale)
+
+ noisy_tar_cond_lat = self.pipe.scheduler.scale_model_input(
+ self.pipe.scheduler.add_noise(self.tar_cond_lat, condition_noise, t), t
+ )
+ vt_tar_uncond, vt_tar_cond = self._calc_v_zero(self.tar_cond_img, zt_tar, t, noisy_tar_cond_lat)
+ vt_tar = self._mix_cfg(vt_tar_cond, vt_tar_uncond, self.tar_guidance_scale)
+
+ return vt_tar - vt_src
+
+ def _propagate_for_timestep(self, zt_edit: torch.Tensor, t: torch.Tensor, dt: torch.Tensor) -> torch.Tensor:
+ """Performs a single propagation step for the edit."""
+ fwd_noise = torch.randn_like(self.x_src)
+ zt_src = self.scheduler.scale_model_input(self.scheduler.add_noise(self.x_src, fwd_noise, t), t)
+ zt_tar = self.scheduler.scale_model_input(self.scheduler.add_noise(zt_edit, fwd_noise, t), t)
+
+ diff_v = self._get_differential_edit_direction(t, zt_src, zt_tar)
+
+ zt_edit_change = dt * diff_v
+ zt_edit = zt_edit.to(torch.float32) + zt_edit_change
+ return zt_edit.to(diff_v.dtype)
+
+ def _calc_v_zero(self, condition_image: Image.Image, noisy_latent: torch.Tensor, t: torch.Tensor, noised_condition: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+ """Calculates the unconditional and conditional v-prediction from the UNet."""
+ DUMMY_GUIDANCE_SCALE = 2
+ model_output = {}
+
+ def hook_fn(module, args, output):
+ model_output['v_pred'] = output[0]
+
+ hook_handle = self.pipe.unet.register_forward_hook(hook_fn)
+
+ try:
+ self.pipe(
+ condition_image,
+ latents=noisy_latent,
+ num_inference_steps=1,
+ guidance_scale=DUMMY_GUIDANCE_SCALE,
+ timesteps=[t.item()],
+ output_type="latent",
+ noisy_cond_lat=noised_condition,
+ )
+ finally:
+ hook_handle.remove()
+
+ return model_output['v_pred'].chunk(2)
+
+ @torch.no_grad()
+ def denoise(self, x_src: torch.Tensor, src_cond_img: Image.Image, tar_cond_img: Image.Image) -> torch.Tensor:
+ """Public method to run the entire denoising process."""
+ self.x_src = x_src
+ self.src_cond_img = src_cond_img
+ self.tar_cond_img = tar_cond_img
+
+ timesteps, _ = retrieve_timesteps(self.scheduler, self.T_steps, self.x_src.device)
+ zt_edit = self.x_src.clone()
+
+ self.src_cond_lat = self.pipe.make_condition_lat(self.src_cond_img, guidance_scale=2.0)
+ self.tar_cond_lat = self.pipe.make_condition_lat(self.tar_cond_img, guidance_scale=2.0)
+
+ start_index = max(0, len(timesteps) - self.n_max)
+
+ for i in tqdm(range(start_index, len(timesteps))):
+ t = timesteps[i]
+ t_i = t / 1000.0
+ t_im1 = timesteps[i + 1] / 1000.0 if i + 1 < len(timesteps) else torch.zeros_like(t_i)
+ dt = t_im1 - t_i
+
+ zt_edit = self._propagate_for_timestep(zt_edit, t, dt)
+
+ return zt_edit
+
+
+def run_editp23(
+ src_condition_path: str,
+ tgt_condition_path: str,
+ original_mv: str,
+ save_path: str,
+ device_number: int = 0,
+ T_steps: int = 50,
+ n_max: int = 31,
+ src_guidance_scale: float = 3.5,
+ tar_guidance_scale: float = 5.0,
+ seed: int = 18,
+ pipeline: Optional[Zero123PlusPipeline] = None,
+) -> None:
+ """Main execution function to run the complete editing pipeline."""
+ if pipeline is None:
+ pipeline = load_z123_pipe(device_number)
+
+ torch.manual_seed(seed)
+ np.random.seed(seed)
+
+ vae_processor = VAEProcessor(pipeline)
+
+ src_cond_img = add_white_bg(Image.open(src_condition_path))
+ tgt_cond_img = add_white_bg(Image.open(tgt_condition_path))
+ mv_src = add_white_bg(Image.open(original_mv))
+ x0_src = vae_processor.encode(mv_src)
+
+ denoiser = EditAwareDenoiser(
+ pipe=pipeline,
+ scheduler=pipeline.scheduler,
+ T_steps=T_steps,
+ src_gs=src_guidance_scale,
+ tar_gs=tar_guidance_scale,
+ n_max=n_max
+ )
+ x0_tar = denoiser.denoise(x0_src, src_cond_img, tgt_cond_img)
+
+ image_tar = vae_processor.decode(x0_tar)
+ image_tar[0].save(save_path)
+ print(f"Successfully saved result to {save_path}")
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..28fba16d6ce346ebc9dfeddfb375ba661a7e1d2f
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,72 @@
+import argparse
+import sys
+from pathlib import Path
+from edit_mv import run_editp23, load_z123_pipe
+
+def main(args: argparse.Namespace) -> None:
+ """
+ Sets up and runs the EditP23 process for a single experiment.
+ """
+ exp_dir = Path(args.exp_dir)
+ input_files = {
+ "src_path": exp_dir / "src.png",
+ "edited_path": exp_dir / "edited.png",
+ "src_mv_path": exp_dir / "src_mv.png",
+ }
+
+ # Pre-run validation to ensure all input files exist
+ for name, path in input_files.items():
+ if not path.is_file():
+ print(f"Error: Input file not found at {path}")
+ sys.exit(1)
+
+ output_dir = exp_dir / "output"
+ output_dir.mkdir(exist_ok=True)
+ save_path = output_dir / f"result_tgs_{args.tar_guidance_scale}_nmax_{args.n_max}.png"
+
+ print(f"Running edit for experiment: {args.exp_dir}")
+ print(f"Saving to: {save_path}")
+
+ pipeline = load_z123_pipe(args.device_number)
+
+ run_editp23(
+ src_condition_path=str(input_files["src_path"]),
+ tgt_condition_path=str(input_files["edited_path"]),
+ original_mv=str(input_files["src_mv_path"]),
+ save_path=str(save_path),
+ device_number=args.device_number,
+ T_steps=args.T_steps,
+ n_max=args.n_max,
+ src_guidance_scale=args.src_guidance_scale,
+ tar_guidance_scale=args.tar_guidance_scale,
+ seed=args.seed,
+ pipeline=pipeline,
+ )
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="""Run EditP23 for 3D object editing.
+Paper presets for (tar_guidance_scale, n_max):
+- Mild: (5, 31)
+- Medium: (6, 41), (12, 42)
+- Hard: (21, 39)""",
+ formatter_class=argparse.RawTextHelpFormatter
+ )
+
+ parser.add_argument("--exp_dir", type=str, required=True,
+ help="Path to the experiment directory. Expects src.png, edited.png, and src_mv.png in this directory.")
+ parser.add_argument("--device_number", type=int, default=0,
+ help="GPU device number to use.")
+ parser.add_argument("--seed", type=int, default=18,
+ help="Random seed for reproducibility.")
+ parser.add_argument("--T_steps", type=int, default=50,
+ help="Total number of denoising steps.")
+ parser.add_argument("--n_max", type=int, default=31,
+ help="Number of scheduler steps for edit-aware guidance. Increase up to T_steps for more significant edits.")
+ parser.add_argument("--src_guidance_scale", type=float, default=3.5,
+ help="CFG scale for the source condition. Can typically remain constant.")
+ parser.add_argument("--tar_guidance_scale", type=float, default=5.0,
+ help="CFG scale for the target condition. Increase for more significant edits.")
+
+ args = parser.parse_args()
+ main(args)
\ No newline at end of file
diff --git a/src/pipeline.py b/src/pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d223713fae0ab97e62e489374137b6f9a686b4e
--- /dev/null
+++ b/src/pipeline.py
@@ -0,0 +1,512 @@
+import os
+from typing import Any, Dict, Optional
+from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.schedulers import KarrasDiffusionSchedulers
+
+import numpy
+import torch
+import torch.nn as nn
+import transformers
+from collections import OrderedDict
+from PIL import Image
+from torchvision import transforms
+from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
+
+import diffusers
+from diffusers import (
+ AutoencoderKL,
+ DDPMScheduler,
+ DiffusionPipeline,
+ EulerAncestralDiscreteScheduler,
+ UNet2DConditionModel,
+ ImagePipelineOutput,
+)
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.models.attention_processor import (
+ Attention,
+ AttnProcessor,
+ XFormersAttnProcessor,
+ AttnProcessor2_0,
+)
+from diffusers.utils.import_utils import is_xformers_available
+
+
+def to_rgb_image(maybe_rgba: Image.Image):
+ if maybe_rgba.mode == "RGB":
+ return maybe_rgba
+ elif maybe_rgba.mode == "RGBA":
+ rgba = maybe_rgba
+ img = numpy.random.randint(
+ 127, 128, size=[rgba.size[1], rgba.size[0], 3], dtype=numpy.uint8
+ )
+ img = Image.fromarray(img, "RGB")
+ img.paste(rgba, mask=rgba.getchannel("A"))
+ return img
+ else:
+ raise ValueError("Unsupported image type.", maybe_rgba.mode)
+
+
+class ReferenceOnlyAttnProc(torch.nn.Module):
+ def __init__(self, chained_proc, enabled=False, name=None) -> None:
+ super().__init__()
+ self.enabled = enabled
+ self.chained_proc = chained_proc
+ self.name = name
+
+ def __call__(
+ self,
+ attn: Attention,
+ hidden_states,
+ encoder_hidden_states=None,
+ attention_mask=None,
+ mode="w",
+ ref_dict: dict = None,
+ is_cfg_guidance=False,
+ ) -> Any:
+ if encoder_hidden_states is None:
+ encoder_hidden_states = hidden_states
+ if self.enabled and is_cfg_guidance:
+ res0 = self.chained_proc(
+ attn, hidden_states[:1], encoder_hidden_states[:1], attention_mask
+ )
+ hidden_states = hidden_states[1:]
+ encoder_hidden_states = encoder_hidden_states[1:]
+ if self.enabled:
+ if mode == "w":
+ ref_dict[self.name] = encoder_hidden_states
+ elif mode == "r":
+ encoder_hidden_states = torch.cat(
+ [encoder_hidden_states, ref_dict.pop(self.name)], dim=1
+ )
+ elif mode == "m":
+ encoder_hidden_states = torch.cat(
+ [encoder_hidden_states, ref_dict[self.name]], dim=1
+ )
+ else:
+ assert False, mode
+ res = self.chained_proc(
+ attn, hidden_states, encoder_hidden_states, attention_mask
+ )
+ if self.enabled and is_cfg_guidance:
+ res = torch.cat([res0, res])
+ return res
+
+
+class RefOnlyNoisedUNet(torch.nn.Module):
+ def __init__(
+ self,
+ unet: UNet2DConditionModel,
+ train_sched: DDPMScheduler,
+ val_sched: EulerAncestralDiscreteScheduler,
+ ) -> None:
+ super().__init__()
+ self.unet = unet
+ self.train_sched = train_sched
+ self.val_sched = val_sched
+
+ unet_lora_attn_procs = dict()
+ for name, _ in unet.attn_processors.items():
+ if torch.__version__ >= "2.0":
+ default_attn_proc = AttnProcessor2_0()
+ elif is_xformers_available():
+ default_attn_proc = XFormersAttnProcessor()
+ else:
+ default_attn_proc = AttnProcessor()
+ unet_lora_attn_procs[name] = ReferenceOnlyAttnProc(
+ default_attn_proc, enabled=name.endswith("attn1.processor"), name=name
+ )
+ unet.set_attn_processor(unet_lora_attn_procs)
+
+ def __getattr__(self, name: str):
+ try:
+ return super().__getattr__(name)
+ except AttributeError:
+ return getattr(self.unet, name)
+
+ def forward_cond(
+ self,
+ noisy_cond_lat,
+ timestep,
+ encoder_hidden_states,
+ class_labels,
+ ref_dict,
+ is_cfg_guidance,
+ **kwargs,
+ ):
+ if is_cfg_guidance:
+ encoder_hidden_states = encoder_hidden_states[1:]
+ class_labels = class_labels[1:]
+ self.unet(
+ noisy_cond_lat,
+ timestep,
+ encoder_hidden_states=encoder_hidden_states,
+ class_labels=class_labels,
+ cross_attention_kwargs=dict(mode="w", ref_dict=ref_dict),
+ **kwargs,
+ )
+
+ def forward(
+ self,
+ sample,
+ timestep,
+ encoder_hidden_states,
+ class_labels=None,
+ *args,
+ cross_attention_kwargs,
+ down_block_res_samples=None,
+ mid_block_res_sample=None,
+ **kwargs,
+ ):
+ cond_lat = cross_attention_kwargs["cond_lat"]
+ noisy_cond_lat = cross_attention_kwargs.get("noisy_cond_lat", None)
+ is_cfg_guidance = cross_attention_kwargs.get("is_cfg_guidance", False)
+ noise = torch.randn_like(cond_lat)
+ if noisy_cond_lat is None:
+ if self.training:
+ noisy_cond_lat = self.train_sched.add_noise(cond_lat, noise, timestep)
+ noisy_cond_lat = self.train_sched.scale_model_input(
+ noisy_cond_lat, timestep
+ )
+ else:
+ noisy_cond_lat = self.val_sched.add_noise(
+ cond_lat, noise, timestep.reshape(-1)
+ )
+ noisy_cond_lat = self.val_sched.scale_model_input(
+ noisy_cond_lat, timestep.reshape(-1)
+ )
+ ref_dict = {}
+ self.forward_cond(
+ noisy_cond_lat,
+ timestep,
+ encoder_hidden_states,
+ class_labels,
+ ref_dict,
+ is_cfg_guidance,
+ **kwargs,
+ )
+ weight_dtype = self.unet.dtype
+ return self.unet(
+ sample,
+ timestep,
+ encoder_hidden_states,
+ *args,
+ class_labels=class_labels,
+ cross_attention_kwargs=dict(
+ mode="r", ref_dict=ref_dict, is_cfg_guidance=is_cfg_guidance
+ ),
+ down_block_additional_residuals=(
+ [sample.to(dtype=weight_dtype) for sample in down_block_res_samples]
+ if down_block_res_samples is not None
+ else None
+ ),
+ mid_block_additional_residual=(
+ mid_block_res_sample.to(dtype=weight_dtype)
+ if mid_block_res_sample is not None
+ else None
+ ),
+ **kwargs,
+ )
+
+
+def scale_latents(latents):
+ latents = (latents - 0.22) * 0.75
+ return latents
+
+
+def unscale_latents(latents):
+ latents = latents / 0.75 + 0.22
+ return latents
+
+
+def scale_image(image):
+ image = image * 0.5 / 0.8
+ return image
+
+
+def unscale_image(image):
+ image = image / 0.5 * 0.8
+ return image
+
+
+class DepthControlUNet(torch.nn.Module):
+ def __init__(
+ self,
+ unet: RefOnlyNoisedUNet,
+ controlnet: Optional[diffusers.ControlNetModel] = None,
+ conditioning_scale=1.0,
+ ) -> None:
+ super().__init__()
+ self.unet = unet
+ if controlnet is None:
+ self.controlnet = diffusers.ControlNetModel.from_unet(unet.unet)
+ else:
+ self.controlnet = controlnet
+ DefaultAttnProc = AttnProcessor2_0
+ if is_xformers_available():
+ DefaultAttnProc = XFormersAttnProcessor
+ self.controlnet.set_attn_processor(DefaultAttnProc())
+ self.conditioning_scale = conditioning_scale
+
+ def __getattr__(self, name: str):
+ try:
+ return super().__getattr__(name)
+ except AttributeError:
+ return getattr(self.unet, name)
+
+ def forward(
+ self,
+ sample,
+ timestep,
+ encoder_hidden_states,
+ class_labels=None,
+ *args,
+ cross_attention_kwargs: dict,
+ **kwargs,
+ ):
+ cross_attention_kwargs = dict(cross_attention_kwargs)
+ control_depth = cross_attention_kwargs.pop("control_depth")
+ down_block_res_samples, mid_block_res_sample = self.controlnet(
+ sample,
+ timestep,
+ encoder_hidden_states=encoder_hidden_states,
+ controlnet_cond=control_depth,
+ conditioning_scale=self.conditioning_scale,
+ return_dict=False,
+ )
+ return self.unet(
+ sample,
+ timestep,
+ encoder_hidden_states=encoder_hidden_states,
+ down_block_res_samples=down_block_res_samples,
+ mid_block_res_sample=mid_block_res_sample,
+ cross_attention_kwargs=cross_attention_kwargs,
+ )
+
+
+class ModuleListDict(torch.nn.Module):
+ def __init__(self, procs: dict) -> None:
+ super().__init__()
+ self.keys = sorted(procs.keys())
+ self.values = torch.nn.ModuleList(procs[k] for k in self.keys)
+
+ def __getitem__(self, key):
+ return self.values[self.keys.index(key)]
+
+
+class SuperNet(torch.nn.Module):
+ def __init__(self, state_dict: Dict[str, torch.Tensor]):
+ super().__init__()
+ state_dict = OrderedDict((k, state_dict[k]) for k in sorted(state_dict.keys()))
+ self.layers = torch.nn.ModuleList(state_dict.values())
+ self.mapping = dict(enumerate(state_dict.keys()))
+ self.rev_mapping = {v: k for k, v in enumerate(state_dict.keys())}
+
+ # .processor for unet, .self_attn for text encoder
+ self.split_keys = [".processor", ".self_attn"]
+
+ # we add a hook to state_dict() and load_state_dict() so that the
+ # naming fits with `unet.attn_processors`
+ def map_to(module, state_dict, *args, **kwargs):
+ new_state_dict = {}
+ for key, value in state_dict.items():
+ num = int(key.split(".")[1]) # 0 is always "layers"
+ new_key = key.replace(f"layers.{num}", module.mapping[num])
+ new_state_dict[new_key] = value
+
+ return new_state_dict
+
+ def remap_key(key, state_dict):
+ for k in self.split_keys:
+ if k in key:
+ return key.split(k)[0] + k
+ return key.split(".")[0]
+
+ def map_from(module, state_dict, *args, **kwargs):
+ all_keys = list(state_dict.keys())
+ for key in all_keys:
+ replace_key = remap_key(key, state_dict)
+ new_key = key.replace(
+ replace_key, f"layers.{module.rev_mapping[replace_key]}"
+ )
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ self._register_state_dict_hook(map_to)
+ self._register_load_state_dict_pre_hook(map_from, with_module=True)
+
+
+class Zero123PlusPipeline(diffusers.StableDiffusionPipeline):
+ tokenizer: transformers.CLIPTokenizer
+ text_encoder: transformers.CLIPTextModel
+ vision_encoder: transformers.CLIPVisionModelWithProjection
+
+ feature_extractor_clip: transformers.CLIPImageProcessor
+ unet: UNet2DConditionModel
+ scheduler: diffusers.schedulers.KarrasDiffusionSchedulers
+
+ vae: AutoencoderKL
+ ramping: nn.Linear
+
+ feature_extractor_vae: transformers.CLIPImageProcessor
+
+ depth_transforms_multi = transforms.Compose(
+ [transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
+ )
+
+ def __init__(
+ self,
+ vae: AutoencoderKL,
+ text_encoder: CLIPTextModel,
+ tokenizer: CLIPTokenizer,
+ unet: UNet2DConditionModel,
+ scheduler: KarrasDiffusionSchedulers,
+ vision_encoder: transformers.CLIPVisionModelWithProjection,
+ feature_extractor_clip: CLIPImageProcessor,
+ feature_extractor_vae: CLIPImageProcessor,
+ ramping_coefficients: Optional[list] = None,
+ safety_checker=None,
+ ):
+ DiffusionPipeline.__init__(self)
+
+ self.register_modules(
+ vae=vae,
+ text_encoder=text_encoder,
+ tokenizer=tokenizer,
+ unet=unet,
+ scheduler=scheduler,
+ safety_checker=None,
+ vision_encoder=vision_encoder,
+ feature_extractor_clip=feature_extractor_clip,
+ feature_extractor_vae=feature_extractor_vae,
+ )
+ self.register_to_config(ramping_coefficients=ramping_coefficients)
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+
+ def prepare(self):
+ train_sched = DDPMScheduler.from_config(self.scheduler.config)
+ if isinstance(self.unet, UNet2DConditionModel):
+ self.unet = RefOnlyNoisedUNet(self.unet, train_sched, self.scheduler).eval()
+
+ def add_controlnet(
+ self,
+ controlnet: Optional[diffusers.ControlNetModel] = None,
+ conditioning_scale=1.0,
+ ):
+ self.prepare()
+ self.unet = DepthControlUNet(self.unet, controlnet, conditioning_scale)
+ return SuperNet(OrderedDict([("controlnet", self.unet.controlnet)]))
+
+ def encode_condition_image(self, image: torch.Tensor):
+ image = self.vae.encode(image).latent_dist.sample()
+ return image
+
+ def make_condition_lat(
+ self,
+ local_cond_image,
+ num_images_per_prompt: Optional[int] = 1,
+ guidance_scale=4.0,
+ ):
+ local_cond_image = to_rgb_image(local_cond_image)
+ local_cond_image_f = self.feature_extractor_vae(
+ images=local_cond_image, return_tensors="pt"
+ ).pixel_values
+
+ image = local_cond_image_f.to(device=self.vae.device, dtype=self.vae.dtype)
+ cond_lat = self.encode_condition_image(image)
+ if guidance_scale > 1:
+ negative_lat = self.encode_condition_image(torch.zeros_like(image))
+ cond_lat = torch.cat([negative_lat, cond_lat])
+ if num_images_per_prompt > 1:
+ bs_embed, *lat_shape = cond_lat.shape
+ assert len(lat_shape) == 3
+ cond_lat = cond_lat.repeat(1, num_images_per_prompt, 1, 1)
+ cond_lat = cond_lat.view(bs_embed * num_images_per_prompt, *lat_shape)
+ return cond_lat
+
+ @torch.no_grad()
+ def __call__(
+ self,
+ image: Image.Image = None,
+ prompt="",
+ *args,
+ num_images_per_prompt: Optional[int] = 1,
+ guidance_scale=4.0,
+ depth_image: Image.Image = None,
+ output_type: Optional[str] = "pil",
+ width=640,
+ height=960,
+ num_inference_steps=28,
+ return_dict=True,
+ noisy_cond_lat=None,
+ **kwargs,
+ ):
+ self.prepare()
+ if image is None:
+ raise ValueError(
+ "Inputting embeddings not supported for this pipeline. Please pass an image."
+ )
+ global_image = image
+ local_image = image
+
+ assert not isinstance(image, torch.Tensor)
+ image = to_rgb_image(image)
+ global_image = to_rgb_image(global_image)
+ image_2 = self.feature_extractor_clip(
+ images=global_image, return_tensors="pt"
+ ).pixel_values
+
+ if depth_image is not None and hasattr(self.unet, "controlnet"):
+ depth_image = to_rgb_image(depth_image)
+ depth_image = self.depth_transforms_multi(depth_image).to(
+ device=self.unet.controlnet.device, dtype=self.unet.controlnet.dtype
+ )
+ image_2 = image_2.to(device=self.vae.device, dtype=self.vae.dtype)
+
+ encoded = self.vision_encoder(image_2, output_hidden_states=False)
+ global_embeds = encoded.image_embeds
+ global_embeds = global_embeds.unsqueeze(-2)
+
+ if hasattr(self, "encode_prompt"):
+ encoder_hidden_states = self.encode_prompt(prompt, self.device, 1, False)[0]
+ else:
+ encoder_hidden_states = self._encode_prompt(prompt, self.device, 1, False)
+ ramp = global_embeds.new_tensor(self.config.ramping_coefficients).unsqueeze(-1)
+ encoder_hidden_states = encoder_hidden_states + global_embeds * ramp
+ cond_lat = self.make_condition_lat(local_image, num_images_per_prompt, guidance_scale)
+
+ cak = dict(cond_lat=cond_lat, noisy_cond_lat=noisy_cond_lat)
+ if hasattr(self.unet, "controlnet"):
+ cak["control_depth"] = depth_image
+ latents: torch.Tensor = (
+ super()
+ .__call__(
+ None,
+ *args,
+ cross_attention_kwargs=cak,
+ guidance_scale=guidance_scale,
+ num_images_per_prompt=num_images_per_prompt,
+ prompt_embeds=encoder_hidden_states,
+ num_inference_steps=num_inference_steps,
+ output_type="latent",
+ width=width,
+ height=height,
+ **kwargs,
+ )
+ .images
+ )
+ latents = unscale_latents(latents)
+ if not output_type == "latent":
+ image = unscale_image(
+ self.vae.decode(
+ latents / self.vae.config.scaling_factor, return_dict=False
+ )[0]
+ )
+ else:
+ image = latents
+
+ image = self.image_processor.postprocess(image, output_type=output_type)
+ if not return_dict:
+ return (image,)
+
+ return ImagePipelineOutput(images=image)
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..103af98cd5b492305d62ab27f5619094be381c8d
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,46 @@
+import torch
+import os
+from diffusers import DDPMScheduler
+from pipeline import Zero123PlusPipeline
+from huggingface_hub import hf_hub_download
+from PIL import Image
+
+
+def load_z123_pipe(device_number):
+ device = torch.device(
+ f"cuda:{device_number}" if torch.cuda.is_available() else "cpu"
+ )
+
+ pipeline = Zero123PlusPipeline.from_pretrained(
+ "sudo-ai/zero123plus-v1.2", torch_dtype=torch.float16
+ )
+ # DDPM supports custom timesteps
+ pipeline.scheduler = DDPMScheduler.from_config(pipeline.scheduler.config)
+
+ unet_path = "ckpts/diffusion_pytorch_model.bin"
+ # load custom white-background UNet
+ if os.path.exists(unet_path):
+ unet_ckpt_path = unet_path
+ else:
+ unet_ckpt_path = hf_hub_download(
+ repo_id="TencentARC/InstantMesh",
+ filename="diffusion_pytorch_model.bin",
+ repo_type="model",
+ )
+ state_dict = torch.load(unet_ckpt_path, map_location="cpu")
+ pipeline.unet.load_state_dict(state_dict, strict=True)
+
+ pipeline.to(device)
+ return pipeline
+
+
+def add_white_bg(image):
+ # Check if image has transparency (RGBA or LA mode)
+ if image.mode in ("RGBA", "LA"):
+ # Create a white background image of the same size
+ white_bg = Image.new("RGB", image.size, (255, 255, 255))
+ # Paste original image onto white background using alpha channel as mask
+ white_bg.paste(image, mask=image.split()[-1])
+ return white_bg
+ # If no transparency, return the original image
+ return image