Spaces:

stabilityai
/

stable-point-aware-3d

Running on L4

App Files Files Community

mboss Aaryaman Vasishta commited on Dec 31, 2024

Commit

64fccd8

1 Parent(s): c2f384d

Update demo with latest changes

Browse files

Co-authored-by: Aaryaman Vasishta <aaryaman.vasishta@stability.ai>

Files changed (5) hide show

gradio_app.py +98 -13
requirements.txt +1 -0
run.py +2 -2
spar3d/models/global_estimator/reni_estimator.py +7 -3
spar3d/system.py +14 -10

gradio_app.py CHANGED Viewed

@@ -2,10 +2,12 @@ import os
 import random
 import tempfile
 import time
 from contextlib import nullcontext
 from functools import lru_cache
 from typing import Any
 import gradio as gr
 import numpy as np
 import torch
@@ -62,6 +64,23 @@ example_files = [
 ]
 def forward_model(
     batch,
     system,
@@ -105,11 +124,16 @@ def forward_model(
     # forward for the final mesh
     trimesh_mesh, _glob_dict = model.generate_mesh(
-        batch, texture_resolution, remesh=remesh_option, vertex_count=vertex_count
     )
     trimesh_mesh = trimesh_mesh[0]
-    return trimesh_mesh, pc_rgb_trimesh
 def run_model(
@@ -169,7 +193,7 @@ def run_model(
                         dim=1,
                     )
-            trimesh_mesh, trimesh_pc = forward_model(
                 model_batch,
                 model,
                 guidance_scale=guidance_scale,
@@ -191,9 +215,13 @@ def run_model(
     trimesh_pc.export(tmp_file_pc)
     generated_files.append(tmp_file_pc)
     print("Generation took:", time.time() - start, "s")
-    return tmp_file, tmp_file_pc, trimesh_pc
 def create_batch(input_image: Image) -> dict[str, Any]:
@@ -272,7 +300,7 @@ def process_model_run(
         f"Final vertex count: {final_vertex_count} with type {vertex_count_type} and vertex count {vertex_count}"
     )
-    glb_file, pc_file, pc_plot = run_model(
         background_state,
         guidance_scale,
         random_seed,
@@ -295,7 +323,7 @@ def process_model_run(
             ]
         )
-    return glb_file, pc_file, point_list
 def regenerate_run(
@@ -308,7 +336,7 @@ def regenerate_run(
     vertex_count,
     texture_resolution,
 ):
-    glb_file, pc_file, point_list = process_model_run(
         background_state,
         guidance_scale,
         random_seed,
@@ -318,6 +346,8 @@ def regenerate_run(
         vertex_count,
         texture_resolution,
     )
     return (
         gr.update(),  # run_btn
         gr.update(),  # img_proc_state
@@ -325,10 +355,12 @@ def regenerate_run(
         gr.update(),  # preview_removal
         gr.update(value=glb_file, visible=True),  # output_3d
         gr.update(visible=True),  # hdr_row
         gr.update(visible=True),  # point_cloud_row
         gr.update(value=point_list),  # point_cloud_editor
         gr.update(value=pc_file),  # pc_download
         gr.update(visible=False),  # regenerate_btn
     )
@@ -362,7 +394,7 @@ def run_button(
         else:
             pc_cond = None
-        glb_file, pc_file, pc_list = process_model_run(
             background_state,
             guidance_scale,
             random_seed,
@@ -373,6 +405,8 @@ def run_button(
             texture_resolution,
         )
         if torch.cuda.is_available():
             print("Peak Memory:", torch.cuda.max_memory_allocated() / 1024 / 1024, "MB")
         elif torch.backends.mps.is_available():
@@ -387,10 +421,12 @@ def run_button(
             gr.update(),  # preview_removal
             gr.update(value=glb_file, visible=True),  # output_3d
             gr.update(visible=True),  # hdr_row
             gr.update(visible=True),  # point_cloud_row
             gr.update(value=pc_list),  # point_cloud_editor
             gr.update(value=pc_file),  # pc_download
             gr.update(visible=False),  # regenerate_btn
         )
     elif run_btn == "Remove Background":
@@ -410,10 +446,12 @@ def run_button(
             gr.update(value=show_mask_img(fr_res), visible=True),  # preview_removal
             gr.update(value=None, visible=False),  # output_3d
             gr.update(visible=False),  # hdr_row
             gr.update(visible=False),  # point_cloud_row
             gr.update(value=None),  # point_cloud_editor
             gr.update(value=None),  # pc_download
             gr.update(visible=False),  # regenerate_btn
         )
@@ -425,11 +463,13 @@ def requires_bg_remove(image, fr, no_crop):
             None,  # background_remove_state
             gr.update(value=None, visible=False),  # preview_removal
             gr.update(value=None, visible=False),  # output_3d
-            gr.update(visible=False),  # hdr_row
             gr.update(visible=False),  # point_cloud_row
             gr.update(value=None),  # point_cloud_editor
             gr.update(value=None),  # pc_download
             gr.update(visible=False),  # regenerate_btn
         )
     alpha_channel = np.array(image.getchannel("A"))
     min_alpha = alpha_channel.min()
@@ -446,10 +486,12 @@ def requires_bg_remove(image, fr, no_crop):
             gr.update(value=show_mask_img(fr_res), visible=True),  # preview_removal
             gr.update(value=None, visible=False),  # output_3d
             gr.update(visible=False),  # hdr_row
             gr.update(visible=False),  # point_cloud_row
             gr.update(value=None),  # point_cloud_editor
             gr.update(value=None),  # pc_download
             gr.update(visible=False),  # regenerate_btn
         )
     return (
         gr.update(value="Remove Background", visible=True),  # run_Btn
@@ -458,10 +500,12 @@ def requires_bg_remove(image, fr, no_crop):
         gr.update(value=None, visible=False),  # preview_removal
         gr.update(value=None, visible=False),  # output_3d
         gr.update(visible=False),  # hdr_row
         gr.update(visible=False),  # point_cloud_row
         gr.update(value=None),  # point_cloud_editor
         gr.update(value=None),  # pc_download
         gr.update(visible=False),  # regenerate_btn
     )
@@ -487,6 +531,7 @@ def update_resolution_controls(remesh_choice, vertex_count_type):
 with gr.Blocks() as demo:
     img_proc_state = gr.State()
     background_remove_state = gr.State()
     gr.Markdown(
         """
     # SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images
@@ -699,12 +744,46 @@ with gr.Blocks() as demo:
                         inputs=hdr_illumination_file,
                     )
                     hdr_illumination_file.change(
-                        lambda x: gr.update(env_map=x.name if x is not None else None),
-                        inputs=hdr_illumination_file,
-                        outputs=[output_3d],
                     )
     examples = gr.Examples(
         examples=example_files, inputs=input_img, examples_per_page=11
     )
@@ -719,10 +798,12 @@ with gr.Blocks() as demo:
             preview_removal,
             output_3d,
             hdr_row,
             point_cloud_row,
             point_cloud_editor,
             pc_download,
             regenerate_btn,
         ],
     )
@@ -751,10 +832,12 @@ with gr.Blocks() as demo:
             preview_removal,
             output_3d,
             hdr_row,
             point_cloud_row,
             point_cloud_editor,
             pc_download,
             regenerate_btn,
         ],
     )
@@ -782,11 +865,13 @@ with gr.Blocks() as demo:
             preview_removal,
             output_3d,
             hdr_row,
             point_cloud_row,
             point_cloud_editor,
             pc_download,
             regenerate_btn,
         ],
     )
-demo.queue().launch()

 import random
 import tempfile
 import time
+import zipfile
 from contextlib import nullcontext
 from functools import lru_cache
 from typing import Any
+import cv2
 import gradio as gr
 import numpy as np
 import torch
 ]
+def create_zip_file(glb_file, pc_file, illumination_file):
+    if not all([glb_file, pc_file, illumination_file]):
+        return None
+    # Create a temporary zip file
+    temp_dir = tempfile.mkdtemp()
+    zip_path = os.path.join(temp_dir, "spar3d_output.zip")
+    with zipfile.ZipFile(zip_path, "w") as zipf:
+        zipf.write(glb_file, "mesh.glb")
+        zipf.write(pc_file, "points.ply")
+        zipf.write(illumination_file, "illumination.hdr")
+    generated_files.append(zip_path)
+    return zip_path
 def forward_model(
     batch,
     system,
     # forward for the final mesh
     trimesh_mesh, _glob_dict = model.generate_mesh(
+        batch,
+        texture_resolution,
+        remesh=remesh_option,
+        vertex_count=vertex_count,
+        estimate_illumination=True,
     )
     trimesh_mesh = trimesh_mesh[0]
+    illumination = _glob_dict["illumination"]
+    return trimesh_mesh, pc_rgb_trimesh, illumination.cpu().detach().numpy()[0]
 def run_model(
                         dim=1,
                     )
+            trimesh_mesh, trimesh_pc, illumination_map = forward_model(
                 model_batch,
                 model,
                 guidance_scale=guidance_scale,
     trimesh_pc.export(tmp_file_pc)
     generated_files.append(tmp_file_pc)
+    tmp_file_illumination = os.path.join(temp_dir, "illumination.hdr")
+    cv2.imwrite(tmp_file_illumination, illumination_map)
+    generated_files.append(tmp_file_illumination)
     print("Generation took:", time.time() - start, "s")
+    return tmp_file, tmp_file_pc, tmp_file_illumination, trimesh_pc
 def create_batch(input_image: Image) -> dict[str, Any]:
         f"Final vertex count: {final_vertex_count} with type {vertex_count_type} and vertex count {vertex_count}"
     )
+    glb_file, pc_file, illumination_file, pc_plot = run_model(
         background_state,
         guidance_scale,
         random_seed,
             ]
         )
+    return glb_file, pc_file, illumination_file, point_list
 def regenerate_run(
     vertex_count,
     texture_resolution,
 ):
+    glb_file, pc_file, illumination_file, point_list = process_model_run(
         background_state,
         guidance_scale,
         random_seed,
         vertex_count,
         texture_resolution,
     )
+    zip_file = create_zip_file(glb_file, pc_file, illumination_file)
     return (
         gr.update(),  # run_btn
         gr.update(),  # img_proc_state
         gr.update(),  # preview_removal
         gr.update(value=glb_file, visible=True),  # output_3d
         gr.update(visible=True),  # hdr_row
+        illumination_file,  # hdr_file
         gr.update(visible=True),  # point_cloud_row
         gr.update(value=point_list),  # point_cloud_editor
         gr.update(value=pc_file),  # pc_download
         gr.update(visible=False),  # regenerate_btn
+        gr.update(value=zip_file, visible=True),  # download_all_btn
     )
         else:
             pc_cond = None
+        glb_file, pc_file, illumination_file, pc_list = process_model_run(
             background_state,
             guidance_scale,
             random_seed,
             texture_resolution,
         )
+        zip_file = create_zip_file(glb_file, pc_file, illumination_file)
         if torch.cuda.is_available():
             print("Peak Memory:", torch.cuda.max_memory_allocated() / 1024 / 1024, "MB")
         elif torch.backends.mps.is_available():
             gr.update(),  # preview_removal
             gr.update(value=glb_file, visible=True),  # output_3d
             gr.update(visible=True),  # hdr_row
+            illumination_file,  # hdr_file
             gr.update(visible=True),  # point_cloud_row
             gr.update(value=pc_list),  # point_cloud_editor
             gr.update(value=pc_file),  # pc_download
             gr.update(visible=False),  # regenerate_btn
+            gr.update(value=zip_file, visible=True),  # download_all_btn
         )
     elif run_btn == "Remove Background":
             gr.update(value=show_mask_img(fr_res), visible=True),  # preview_removal
             gr.update(value=None, visible=False),  # output_3d
             gr.update(visible=False),  # hdr_row
+            None,  # hdr_file
             gr.update(visible=False),  # point_cloud_row
             gr.update(value=None),  # point_cloud_editor
             gr.update(value=None),  # pc_download
             gr.update(visible=False),  # regenerate_btn
+            gr.update(value=None, visible=False),  # download_all_btn
         )
             None,  # background_remove_state
             gr.update(value=None, visible=False),  # preview_removal
             gr.update(value=None, visible=False),  # output_3d
+            gr.update(value=None, visible=False),  # hdr_row
+            None,  # hdr_file
             gr.update(visible=False),  # point_cloud_row
             gr.update(value=None),  # point_cloud_editor
             gr.update(value=None),  # pc_download
             gr.update(visible=False),  # regenerate_btn
+            gr.update(value=None, visible=False),  # download_all_btn
         )
     alpha_channel = np.array(image.getchannel("A"))
     min_alpha = alpha_channel.min()
             gr.update(value=show_mask_img(fr_res), visible=True),  # preview_removal
             gr.update(value=None, visible=False),  # output_3d
             gr.update(visible=False),  # hdr_row
+            None,  # hdr_file
             gr.update(visible=False),  # point_cloud_row
             gr.update(value=None),  # point_cloud_editor
             gr.update(value=None),  # pc_download
             gr.update(visible=False),  # regenerate_btn
+            gr.update(value=None, visible=False),  # download_all_btn
         )
     return (
         gr.update(value="Remove Background", visible=True),  # run_Btn
         gr.update(value=None, visible=False),  # preview_removal
         gr.update(value=None, visible=False),  # output_3d
         gr.update(visible=False),  # hdr_row
+        None,  # hdr_file
         gr.update(visible=False),  # point_cloud_row
         gr.update(value=None),  # point_cloud_editor
         gr.update(value=None),  # pc_download
         gr.update(visible=False),  # regenerate_btn
+        gr.update(value=None, visible=False),  # download_all_btn
     )
 with gr.Blocks() as demo:
     img_proc_state = gr.State()
     background_remove_state = gr.State()
+    hdr_illumination_file_state = gr.State()
     gr.Markdown(
         """
     # SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images
                         inputs=hdr_illumination_file,
                     )
+                    def update_hdr_illumination_file(state, cur_update):
+                        # If the current value of hdr_illumination_file is the same as cur_update, then we don't need to update
+                        if (
+                            hdr_illumination_file.value is not None
+                            and hdr_illumination_file.value == cur_update
+                        ):
+                            return (
+                                gr.update(),
+                                gr.update(),
+                            )
+                        update_value = cur_update if cur_update is not None else state
+                        if update_value is not None:
+                            return (
+                                gr.update(value=update_value),
+                                gr.update(
+                                    env_map=(
+                                        update_value.name
+                                        if isinstance(update_value, gr.File)
+                                        else update_value
+                                    )
+                                ),
+                            )
+                        return (gr.update(value=None), gr.update(env_map=None))
                     hdr_illumination_file.change(
+                        update_hdr_illumination_file,
+                        inputs=[hdr_illumination_file_state, hdr_illumination_file],
+                        outputs=[hdr_illumination_file, output_3d],
                     )
+            download_all_btn = gr.File(
+                label="Download All Files (ZIP)", file_count="single", visible=False
+            )
+    hdr_illumination_file_state.change(
+        fn=lambda x: gr.update(value=x),
+        inputs=hdr_illumination_file_state,
+        outputs=hdr_illumination_file,
+    )
     examples = gr.Examples(
         examples=example_files, inputs=input_img, examples_per_page=11
     )
             preview_removal,
             output_3d,
             hdr_row,
+            hdr_illumination_file_state,
             point_cloud_row,
             point_cloud_editor,
             pc_download,
             regenerate_btn,
+            download_all_btn,
         ],
     )
             preview_removal,
             output_3d,
             hdr_row,
+            hdr_illumination_file_state,
             point_cloud_row,
             point_cloud_editor,
             pc_download,
             regenerate_btn,
+            download_all_btn,
         ],
     )
             preview_removal,
             output_3d,
             hdr_row,
+            hdr_illumination_file_state,
             point_cloud_row,
             point_cloud_editor,
             pc_download,
             regenerate_btn,
+            download_all_btn,
         ],
     )
+demo.queue().launch(share=False)

requirements.txt CHANGED Viewed

@@ -16,6 +16,7 @@ transparent-background==1.3.3
 gradio==4.43.0
 gradio-litmodel3d==0.0.1
 gradio-pointcloudeditor==0.0.9
 gpytoolbox==0.2.0
 # ./texture_baker/
 # ./uv_unwrapper/

 gradio==4.43.0
 gradio-litmodel3d==0.0.1
 gradio-pointcloudeditor==0.0.9
+opencv-python==4.10.0.84
 gpytoolbox==0.2.0
 # ./texture_baker/
 # ./uv_unwrapper/

run.py CHANGED Viewed

@@ -32,9 +32,9 @@ if __name__ == "__main__":
     )
     parser.add_argument(
         "--pretrained-model",
-        default="stabilityai/spar3d",
         type=str,
-        help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/spar3d'",
     )
     parser.add_argument(
         "--foreground-ratio",

     )
     parser.add_argument(
         "--pretrained-model",
+        default="stabilityai/stable-point-aware-3d",
         type=str,
+        help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/stable-point-aware-3d'",
     )
     parser.add_argument(
         "--foreground-ratio",

spar3d/models/global_estimator/reni_estimator.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Any
 import torch
 import torch.nn as nn
@@ -95,6 +95,7 @@ class ReniLatentCodeEstimator(BaseModule):
     def forward(
         self,
         triplane: Float[Tensor, "B 3 F Ht Wt"],
     ) -> dict[str, Any]:
         x = self.layers(
             triplane.reshape(
@@ -104,9 +105,12 @@ class ReniLatentCodeEstimator(BaseModule):
         x = x.mean(dim=[-2, -1])
         latents = self.fc_latents(x).reshape(-1, self.latent_dim, 3)
-        rotations = self.fc_rotations(x)
         scale = self.fc_scale(x)
-        env_map = self.reni_env_map(latents, rotation_6d_to_matrix(rotations), scale)
         return {"illumination": env_map["rgb"]}

 from dataclasses import dataclass, field
+from typing import Any, Optional
 import torch
 import torch.nn as nn
     def forward(
         self,
         triplane: Float[Tensor, "B 3 F Ht Wt"],
+        rotation: Optional[Float[Tensor, "B 3 3"]] = None,
     ) -> dict[str, Any]:
         x = self.layers(
             triplane.reshape(
         x = x.mean(dim=[-2, -1])
         latents = self.fc_latents(x).reshape(-1, self.latent_dim, 3)
+        rotations = rotation_6d_to_matrix(self.fc_rotations(x))
         scale = self.fc_scale(x)
+        if rotation is not None:
+            rotations = rotations @ rotation.to(dtype=rotations.dtype)
+        env_map = self.reni_env_map(latents, rotations, scale)
         return {"illumination": env_map["rgb"]}

spar3d/system.py CHANGED Viewed

@@ -506,6 +506,11 @@ class SPAR3D(BaseModule):
         scene_codes, non_postprocessed_codes = self.get_scene_codes(batch)
         global_dict = {}
         if self.image_estimator is not None:
             global_dict.update(
@@ -514,7 +519,14 @@ class SPAR3D(BaseModule):
                 )
             )
         if self.global_estimator is not None and estimate_illumination:
-            global_dict.update(self.global_estimator(non_postprocessed_codes))
         global_dict["pointcloud"] = batch["pc_cond"]
@@ -700,15 +712,7 @@ class SPAR3D(BaseModule):
                             uv=uvs, material=material
                         ),
                     )
-                    rot = trimesh.transformations.rotation_matrix(
-                        np.radians(-90), [1, 0, 0]
-                    )
-                    tmesh.apply_transform(rot)
-                    tmesh.apply_transform(
-                        trimesh.transformations.rotation_matrix(
-                            np.radians(90), [0, 1, 0]
-                        )
-                    )
                     tmesh.invert()

         scene_codes, non_postprocessed_codes = self.get_scene_codes(batch)
+        # Create a rotation matrix for the final output domain
+        rotation = trimesh.transformations.rotation_matrix(np.radians(-90), [1, 0, 0])
+        rotation2 = trimesh.transformations.rotation_matrix(np.radians(90), [0, 1, 0])
+        output_rotation = rotation2 @ rotation
         global_dict = {}
         if self.image_estimator is not None:
             global_dict.update(
                 )
             )
         if self.global_estimator is not None and estimate_illumination:
+            rotation_torch = (
+                torch.tensor(output_rotation)
+                .to(self.device, dtype=torch.float32)[:3, :3]
+                .unsqueeze(0)
+            )
+            global_dict.update(
+                self.global_estimator(non_postprocessed_codes, rotation=rotation_torch)
+            )
         global_dict["pointcloud"] = batch["pc_cond"]
                             uv=uvs, material=material
                         ),
                     )
+                    tmesh.apply_transform(output_rotation)
                     tmesh.invert()