Spaces:

Surn
/

HexaGrid

Running on Zero

App Files Files Community

Surn commited on Mar 9

Commit

7cff785

1 Parent(s): bc2ae02

Working New 3d Model and Height Map

Browse files

Files changed (5) hide show

app.py +103 -27
trellis/renderers/gaussian_render.py +0 -1
trellis/utils/render_utils.py +61 -2
utils/depth_estimation.py +4 -2
utils/image_utils.py +2 -1

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
-import os
 import spaces
 import numpy as np
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
@@ -16,7 +16,7 @@ from tempfile import NamedTemporaryFile
 import atexit
 import random
 #import accelerate
-from transformers import AutoTokenizer
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
@@ -100,6 +100,8 @@ from utils.version_info import (
     #release_torch_resources,
     #get_torch_info
 )
 input_image_palette = []
@@ -675,8 +677,61 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     return gs, mesh, name
-@spaces.GPU(duration=150,progress=gr.Progress(track_tqdm=True))
-def generate_3d_asset(depth_image_source, randomize_seed, seed, input_image, output_image, overlay_image, bordered_image_output, req: gr.Request, progress=gr.Progress(track_tqdm=True)):
     # Choose the image based on source
     if depth_image_source == "Input Image":
         image_path = input_image
@@ -695,41 +750,45 @@ def generate_3d_asset(depth_image_source, randomize_seed, seed, input_image, out
     # Determine the final seed using default MAX_SEED from constants
     final_seed = np.random.randint(0, constants.MAX_SEED) if randomize_seed else seed
     # Open image using standardized defaults
     image_raw = Image.open(image_path).convert("RGB")
     # Preprocess and run the Trellis pipeline with fixed sampler settings
-    # Returns:
-    # dict: The information of the generated 3D model.
-    # str: The path to the video of the 3D model.
-    processed_image = TRELLIS_PIPELINE.preprocess_image(image_raw, max_resolution=1536)
     outputs = TRELLIS_PIPELINE.run(
         processed_image,
-        seed=final_seed,
         formats=["gaussian", "mesh"],
         preprocess_image=False,
         sparse_structure_sampler_params={
-            "steps": 12,
             "cfg_strength": 7.5,
         },
         slat_sampler_params={
-            "steps": 12,
             "cfg_strength": 3.0,
         },
     )
     # Validate the mesh
     mesh = outputs['mesh'][0]
-    # Depending on the mesh format (it might be a dict or an object)
-    if isinstance(mesh, dict):
         vertices = mesh['vertices']
         faces = mesh['faces']
     else:
         vertices = mesh.vertices
         faces = mesh.faces
-    # Check mesh properties
     print(f"Mesh vertices: {vertices.shape}, faces: {faces.shape}")
     if faces.max() >= vertices.shape[0]:
         raise ValueError(f"Invalid mesh: face index {faces.max()} exceeds vertex count {vertices.shape[0]}")
@@ -738,23 +797,31 @@ def generate_3d_asset(depth_image_source, randomize_seed, seed, input_image, out
     if not vertices.is_cuda or not faces.is_cuda:
         raise ValueError("Mesh data must be on GPU")
     if vertices.dtype != torch.float32 or faces.dtype != torch.int32:
-        raise ValueError("Mesh vertices must be float32, faces must be int32")
-    # Save the video to a temporary file
     user_dir = os.path.join(constants.TMPDIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
-    video = render_utils.render_video(outputs['gaussian'][0], resolution=576, num_frames=60, r=1)['color']
-    snapshot_results = render_utils.render_snapshot(outputs['gaussian'][0], resolution=576)
-    depth_snapshot = snapshot_results['depth'][0]
-    video_geo = render_utils.render_video(outputs['mesh'][0], resolution=576, num_frames=60, r=1)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, f'{output_name}.mp4')
-    imageio.mimsave(video_path, video, fps=15)
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], output_name)
     torch.cuda.empty_cache()
     return [state, video_path, depth_snapshot]
 @spaces.GPU(duration=90,progress=gr.Progress(track_tqdm=True))
 def extract_glb(
     state: dict,
@@ -1028,7 +1095,7 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty',
                                     # Gallery from PRE_RENDERED_IMAGES GOES HERE
                                     prerendered_image_gallery = gr.Gallery(label="Image Gallery", show_label=True, value=build_prerendered_images_by_quality(3,'thumbnail'), elem_id="gallery", elem_classes="solid", type="filepath", columns=[3], rows=[3], preview=False ,object_fit="contain", height="auto", format="png",allow_preview=False)
                                 with gr.Column():
-                                    image_guidance_stength = gr.Slider(label="Image Guidance Strength (prompt percentage)", minimum=0, maximum=1.0, value=0.85, step=0.01, interactive=True)
                                     replace_input_image_button = gr.Button(
                                         "Replace Input Image",
                                         elem_id="prerendered_replace_input_image_button",
@@ -1106,7 +1173,7 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty',
         with gr.Row():
             with gr.Column():
                 # Use standard seed settings only
-                seed_3d = gr.Slider(0, constants.MAX_SEED, label="Seed (3D Generation)", value=0, step=1)
                 randomize_seed_3d = gr.Checkbox(label="Randomize Seed (3D Generation)", value=True)
             with gr.Column():
                 depth_image_source = gr.Radio(
@@ -1116,11 +1183,11 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty',
                 )
         with gr.Row():
             generate_3d_asset_button = gr.Button("Generate 3D Asset", elem_classes="solid", variant="secondary")
         with gr.Row():
             # For display: video output and 3D model preview (GLTF)
             video_output = gr.Video(label="3D Asset Video", autoplay=True, loop=True, height=400)
-        with gr.Row():
-            depth_output = gr.Image(label="Depth Map", image_mode="L", elem_classes="centered solid imgcontainer", format="PNG", type="filepath", key="DepthOutput",interactive=False, show_download_button=True, show_fullscreen_button=True, show_share_button=True)
         with gr.Accordion("GLB Extraction Settings", open=False):
             with gr.Row():
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
@@ -1134,6 +1201,8 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty',
                 model_file = gr.File(label="3D GLTF", elem_classes="solid small centered")
     is_multiimage = gr.State(False)
     output_buf = gr.State()
     with gr.Row():
         gr.Examples(examples=[
             ["assets//examples//hex_map_p1.png", False, True, -32,-31,80,80,-1.8,0,35,0,1,"#FFD0D0", 15],
@@ -1245,8 +1314,13 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty',
     # Chain the buttons
     generate_3d_asset_button.click(
-        fn=generate_3d_asset,
         inputs=[depth_image_source, randomize_seed_3d, seed_3d, input_image, output_image, overlay_image, bordered_image_output],
         outputs=[output_buf, video_output, depth_output],
         scroll_to_output=True
     ).then(
@@ -1293,6 +1367,8 @@ if __name__ == "__main__":
     #-------------- ------------------------------------------------MODEL INITIALIZATION------------------------------------------------------------#
     # Load models once during module import
     TRELLIS_PIPELINE = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
     TRELLIS_PIPELINE.cuda()
     try:

 import gradio as gr
 import spaces
+import os
 import numpy as np
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import atexit
 import random
 #import accelerate
+from transformers import AutoTokenizer, DPTImageProcessor, DPTForDepthEstimation
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
     #release_torch_resources,
     #get_torch_info
 )
+#from utils.depth_estimation import (get_depth_map_from_state)
 input_image_palette = []
     return gs, mesh, name
+@spaces.GPU()
+def depth_process_image(image_path, resized_width=800, z_scale=208):
+    """
+    Processes the input image to generate a depth map.
+    Args:
+        image_path (str): The file path to the input image.
+        resized_width (int, optional): The width to which the image is resized. Defaults to 800.
+        z_scale (int, optional): Z-axis scale factor. Defaults to 208.
+    Returns:
+        list: A list containing the depth image.
+    """
+    image_path = Path(image_path)
+    if not image_path.exists():
+        raise ValueError("Image file not found")
+    # Load and resize the image
+    image_raw = Image.open(image_path).convert("RGB")
+    print(f"Original size: {image_raw.size}")
+    resized_height = int(resized_width * image_raw.size[1] / image_raw.size[0])
+    image = image_raw.resize((resized_width, resized_height), Image.Resampling.LANCZOS)
+    print(f"Resized size: {image.size}")
+    # Prepare image for the model
+    encoding = image_processor(image, return_tensors="pt")
+    # Perform depth estimation
+    with torch.no_grad():
+        outputs = depth_model(**encoding)
+        predicted_depth = outputs.predicted_depth
+    # Interpolate depth to match the image size
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=(image.height, image.width),
+        mode="bicubic",
+        align_corners=False,
+    ).squeeze()
+    # Normalize the depth image to 8-bit
+    if torch.cuda.is_available():
+        prediction = prediction.numpy()
+    else:
+        prediction = prediction.cpu().numpy()
+    depth_min, depth_max = prediction.min(), prediction.max()
+    depth_image = ((prediction - depth_min) / (depth_max - depth_min) * 255).astype("uint8")
+    img = Image.fromarray(depth_image)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+    return img
+def generate_3d_asset_part1(depth_image_source, randomize_seed, seed, input_image, output_image, overlay_image, bordered_image_output, progress=gr.Progress(track_tqdm=True)):
     # Choose the image based on source
     if depth_image_source == "Input Image":
         image_path = input_image
     # Determine the final seed using default MAX_SEED from constants
     final_seed = np.random.randint(0, constants.MAX_SEED) if randomize_seed else seed
+    # Process the image for depth estimation
+    depth_img = depth_process_image(image_path, resized_width=1536, z_scale=332)
+    depth_img = resize_image_with_aspect_ratio(depth_img, 1536, 1536)
+    return depth_img, image_path, output_name, final_seed
+@spaces.GPU(duration=150,progress=gr.Progress(track_tqdm=True))
+def generate_3d_asset_part2(depth_img, image_path, output_name, seed, req: gr.Request, progress=gr.Progress(track_tqdm=True)):
     # Open image using standardized defaults
     image_raw = Image.open(image_path).convert("RGB")
+    resized_image = resize_image_with_aspect_ratio(image_raw, 1536, 1536)
+    depth_img = Image.open(depth_img).convert("RGBA")
     # Preprocess and run the Trellis pipeline with fixed sampler settings
+    processed_image = TRELLIS_PIPELINE.preprocess_image(resized_image, max_resolution=1536)
     outputs = TRELLIS_PIPELINE.run(
         processed_image,
+        seed=seed,
         formats=["gaussian", "mesh"],
         preprocess_image=False,
         sparse_structure_sampler_params={
+            "steps": 15,
             "cfg_strength": 7.5,
         },
         slat_sampler_params={
+            "steps": 15,
             "cfg_strength": 3.0,
         },
     )
     # Validate the mesh
     mesh = outputs['mesh'][0]
+    meshisdict = isinstance(mesh, dict)
+    if meshisdict:
         vertices = mesh['vertices']
         faces = mesh['faces']
     else:
         vertices = mesh.vertices
         faces = mesh.faces
     print(f"Mesh vertices: {vertices.shape}, faces: {faces.shape}")
     if faces.max() >= vertices.shape[0]:
         raise ValueError(f"Invalid mesh: face index {faces.max()} exceeds vertex count {vertices.shape[0]}")
     if not vertices.is_cuda or not faces.is_cuda:
         raise ValueError("Mesh data must be on GPU")
     if vertices.dtype != torch.float32 or faces.dtype != torch.int32:
+        if meshisdict:
+            mesh['faces'] = faces.to(torch.int32)
+            mesh['vertices'] = vertices.to(torch.float32)
+        else:
+            mesh.faces = faces.to(torch.int32)
+            mesh.vertices = vertices.to(torch.float32)
     user_dir = os.path.join(constants.TMPDIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
+    video = render_utils.render_video(outputs['gaussian'][0], resolution=576, num_frames=64, r=1, fov=45)['color']
+    video_geo = render_utils.render_video(outputs['mesh'][0], resolution=576, num_frames=64, r=1, fov=45)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, f'{output_name}.mp4')
+    imageio.mimsave(video_path, video, fps=8)
+    #snapshot_results = render_utils.render_snapshot_depth(outputs['mesh'][0], resolution=1280, r=1, fov=80)
+    #depth_snapshot = Image.fromarray(snapshot_results['normal'][0]).convert("L")
+    depth_snapshot = depth_img
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], output_name)
     torch.cuda.empty_cache()
     return [state, video_path, depth_snapshot]
 @spaces.GPU(duration=90,progress=gr.Progress(track_tqdm=True))
 def extract_glb(
     state: dict,
                                     # Gallery from PRE_RENDERED_IMAGES GOES HERE
                                     prerendered_image_gallery = gr.Gallery(label="Image Gallery", show_label=True, value=build_prerendered_images_by_quality(3,'thumbnail'), elem_id="gallery", elem_classes="solid", type="filepath", columns=[3], rows=[3], preview=False ,object_fit="contain", height="auto", format="png",allow_preview=False)
                                 with gr.Column():
+                                    image_guidance_stength = gr.Slider(label="Image Guidance Strength (prompt percentage)", minimum=0, maximum=1.0, value=0.85, step=0.01, interactive=True)
                                     replace_input_image_button = gr.Button(
                                         "Replace Input Image",
                                         elem_id="prerendered_replace_input_image_button",
         with gr.Row():
             with gr.Column():
                 # Use standard seed settings only
+                seed_3d = gr.Slider(0, constants.MAX_SEED, label="Seed (3D Generation)", value=0, step=1, randomize=True)
                 randomize_seed_3d = gr.Checkbox(label="Randomize Seed (3D Generation)", value=True)
             with gr.Column():
                 depth_image_source = gr.Radio(
                 )
         with gr.Row():
             generate_3d_asset_button = gr.Button("Generate 3D Asset", elem_classes="solid", variant="secondary")
+        with gr.Row():
+            depth_output = gr.Image(label="Depth Map", image_mode="L", elem_classes="centered solid imgcontainer", format="PNG", type="filepath", key="DepthOutput",interactive=False, show_download_button=True, show_fullscreen_button=True, show_share_button=True, height=400)
         with gr.Row():
             # For display: video output and 3D model preview (GLTF)
             video_output = gr.Video(label="3D Asset Video", autoplay=True, loop=True, height=400)
         with gr.Accordion("GLB Extraction Settings", open=False):
             with gr.Row():
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 model_file = gr.File(label="3D GLTF", elem_classes="solid small centered")
     is_multiimage = gr.State(False)
     output_buf = gr.State()
+    ddd_image_path = gr.State("./images/images/Beeuty-1.png")
+    ddd_file_name = gr.State("Hexagon_file")
     with gr.Row():
         gr.Examples(examples=[
             ["assets//examples//hex_map_p1.png", False, True, -32,-31,80,80,-1.8,0,35,0,1,"#FFD0D0", 15],
     # Chain the buttons
     generate_3d_asset_button.click(
+        fn=generate_3d_asset_part1,
         inputs=[depth_image_source, randomize_seed_3d, seed_3d, input_image, output_image, overlay_image, bordered_image_output],
+        outputs=[depth_output, ddd_image_path, ddd_file_name, seed_3d ],
+        scroll_to_output=True
+    ).then(
+        fn=generate_3d_asset_part2,
+        inputs=[depth_output, ddd_image_path, ddd_file_name, seed_3d ],
         outputs=[output_buf, video_output, depth_output],
         scroll_to_output=True
     ).then(
     #-------------- ------------------------------------------------MODEL INITIALIZATION------------------------------------------------------------#
     # Load models once during module import
+    image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large", ignore_mismatched_sizes=True)
     TRELLIS_PIPELINE = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
     TRELLIS_PIPELINE.cuda()
     try:

trellis/renderers/gaussian_render.py CHANGED Viewed

@@ -11,7 +11,6 @@
 import torch
 import math
-from easydict import EasyDict as edict
 import numpy as np
 from ..representations.gaussian import Gaussian
 from .sh_utils import eval_sh

 import torch
 import math
 import numpy as np
 from ..representations.gaussian import Gaussian
 from .sh_utils import eval_sh

trellis/utils/render_utils.py CHANGED Viewed

@@ -67,6 +67,53 @@ def render_frames(sample, extrinsics, intrinsics, options={}, colors_overwrite=N
     else:
         raise ValueError(f'Unsupported sample type: {type(sample)}')
     rets = {}
     for j, (extr, intr) in tqdm(enumerate(zip(extrinsics, intrinsics)), desc='Rendering', disable=not verbose):
         if not isinstance(sample, MeshExtractResult):
@@ -82,11 +129,15 @@ def render_frames(sample, extrinsics, intrinsics, options={}, colors_overwrite=N
                 rets['depth'].append(None)
         else:
             res = renderer.render(sample, extr, intr)
             if 'normal' not in rets: rets['normal'] = []
             rets['normal'].append(np.clip(res['normal'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8))
     return rets
 def render_video(sample, resolution=512, bg_color=(0, 0, 0), num_frames=300, r=2, fov=40, **kwargs):
     yaws = torch.linspace(0, 2 * 3.1415, num_frames)
     pitch = 0.25 + 0.5 * torch.sin(torch.linspace(0, 2 * 3.1415, num_frames))
@@ -107,10 +158,18 @@ def render_multiview(sample, resolution=512, nviews=30):
     return res['color'], extrinsics, intrinsics
-def render_snapshot(samples, resolution=512, bg_color=(0, 0, 0), offset=(-16 / 180 * np.pi, 20 / 180 * np.pi), r=10, fov=8, **kwargs):
     yaw = [0, np.pi/2, np.pi, 3*np.pi/2]
     yaw_offset = offset[0]
     yaw = [y + yaw_offset for y in yaw]
     pitch = [offset[1] for _ in range(4)]
     extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaw, pitch, r, fov)
     return render_frames(samples, extrinsics, intrinsics, {'resolution': resolution, 'bg_color': bg_color}, **kwargs)

     else:
         raise ValueError(f'Unsupported sample type: {type(sample)}')
+    rets = {}
+    for j, (extr, intr) in tqdm(enumerate(zip(extrinsics, intrinsics)), desc='Rendering', disable=not verbose):
+        if not isinstance(sample, MeshExtractResult):
+            res = renderer.render(sample, extr, intr, colors_overwrite=colors_overwrite)
+            if 'color' not in rets: rets['color'] = []
+            # if 'depth' not in rets: rets['depth'] = []
+            rets['color'].append(np.clip(res['color'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8))
+            # if 'percent_depth' in res:
+            #     rets['depth'].append(res['percent_depth'].detach().cpu().numpy())
+            # elif 'depth' in res:
+            #     rets['depth'].append(res['depth'].detach().cpu().numpy())
+            # else:
+            #     rets['depth'].append(None)
+        else:
+            res = renderer.render(sample, extr, intr)
+            if 'normal' not in rets: rets['normal'] = []
+            rets['normal'].append(np.clip(res['normal'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8))
+    return rets
+def render_frames_depth(sample, extrinsics, intrinsics, options={}, colors_overwrite=None, verbose=True, **kwargs):
+    if isinstance(sample, Octree):
+        renderer = OctreeRenderer()
+        renderer.rendering_options.resolution = options.get('resolution', 512)
+        renderer.rendering_options.near = options.get('near', 0.8)
+        renderer.rendering_options.far = options.get('far', 1.6)
+        renderer.rendering_options.bg_color = options.get('bg_color', (0, 0, 0))
+        renderer.rendering_options.ssaa = options.get('ssaa', 4)
+        renderer.pipe.primitive = sample.primitive
+    elif isinstance(sample, Gaussian):
+        renderer = GaussianRenderer()
+        renderer.rendering_options.resolution = options.get('resolution', 512)
+        renderer.rendering_options.near = options.get('near', 0.8)
+        renderer.rendering_options.far = options.get('far', 1.6)
+        renderer.rendering_options.bg_color = options.get('bg_color', (0, 0, 0))
+        renderer.rendering_options.ssaa = options.get('ssaa', 1)
+        renderer.pipe.kernel_size = kwargs.get('kernel_size', 0.1)
+        renderer.pipe.use_mip_gaussian = True
+    elif isinstance(sample, MeshExtractResult):
+        renderer = MeshRenderer()
+        renderer.rendering_options.resolution = options.get('resolution', 512)
+        renderer.rendering_options.near = options.get('near', 1)
+        renderer.rendering_options.far = options.get('far', 100)
+        renderer.rendering_options.ssaa = options.get('ssaa', 4)
+    else:
+        raise ValueError(f'Unsupported sample type: {type(sample)}')
     rets = {}
     for j, (extr, intr) in tqdm(enumerate(zip(extrinsics, intrinsics)), desc='Rendering', disable=not verbose):
         if not isinstance(sample, MeshExtractResult):
                 rets['depth'].append(None)
         else:
             res = renderer.render(sample, extr, intr)
+            if 'depth' not in rets: rets['depth'] = []
             if 'normal' not in rets: rets['normal'] = []
             rets['normal'].append(np.clip(res['normal'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8))
+            if 'depth' in res:
+                rets['depth'].append(np.clip(res['depth'].detach().cpu().numpy(), 0, 255).astype(np.uint8))
+            else:
+                rets['depth'].append(None)
     return rets
 def render_video(sample, resolution=512, bg_color=(0, 0, 0), num_frames=300, r=2, fov=40, **kwargs):
     yaws = torch.linspace(0, 2 * 3.1415, num_frames)
     pitch = 0.25 + 0.5 * torch.sin(torch.linspace(0, 2 * 3.1415, num_frames))
     return res['color'], extrinsics, intrinsics
+def render_snapshot(samples, resolution=512, bg_color=(0, 0, 0), offset=(-16 / 180 * np.pi, 20 / 180 * np.pi), r=2, fov=60, **kwargs):
     yaw = [0, np.pi/2, np.pi, 3*np.pi/2]
     yaw_offset = offset[0]
     yaw = [y + yaw_offset for y in yaw]
     pitch = [offset[1] for _ in range(4)]
     extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaw, pitch, r, fov)
     return render_frames(samples, extrinsics, intrinsics, {'resolution': resolution, 'bg_color': bg_color}, **kwargs)
+def render_snapshot_depth(samples, resolution=512, bg_color=(0, 0, 0), offset=(0, np.pi/2), r=2, fov=90, **kwargs):
+    yaw = [0, np.pi/2, np.pi, 3*np.pi/2]
+    yaw_offset = offset[0]
+    yaw = [y + yaw_offset for y in yaw]
+    pitch = [offset[1] for _ in range(4)]
+    extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaw, pitch, r, fov)
+    return render_frames_depth(samples, extrinsics, intrinsics, {'resolution': resolution, 'bg_color': bg_color}, **kwargs)

utils/depth_estimation.py CHANGED Viewed

@@ -12,6 +12,8 @@ from utils.image_utils import (
     resize_image_with_aspect_ratio
 )
 from utils.constants import TMPDIR
 # Load models once during module import
 image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
@@ -258,10 +260,10 @@ def depth_process_image(image_path, resized_width=800, z_scale=208):
         torch.cuda.ipc_collect()
     return [img, gltf_path, gltf_path]
-def get_depth_map_from_state(state):
     from diff_gaussian_rasterization import GaussianRasterizer, GaussianRasterizationSettings
-    settings = GaussianRasterizationSettings(image_height=1024, image_width=1024, bg_color=(1.0, 1.0, 1.0), max_num_points_per_tile=100, tile_size=32, filter_mode="linear", precompute_cov3D=True, precompute_cov2D=True, precompute_colors=True)
     rasterizer = GaussianRasterizer(settings)
     # Assume state has necessary data like means3D, scales, etc.
     rendered_image, rendered_depth, _, _, _, _ = rasterizer(means3D=state["means3D"], means2D=state["means2D"], shs=state["shs"], colors_precomp=state["colors_precomp"], opacities=state["opacities"], scales=state["scales"], rotations=state["rotations"], cov3D_precomp=state["cov3D_precomp"])

     resize_image_with_aspect_ratio
 )
 from utils.constants import TMPDIR
+from easydict import EasyDict as edict
 # Load models once during module import
 image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
         torch.cuda.ipc_collect()
     return [img, gltf_path, gltf_path]
+def get_depth_map_from_state(state, image_height=1024, image_width=1024):
     from diff_gaussian_rasterization import GaussianRasterizer, GaussianRasterizationSettings
+    settings = GaussianRasterizationSettings(image_height=image_height, image_width=image_width, kernel_size=0.01,bg=(0.0, 0.0, 0.0))
     rasterizer = GaussianRasterizer(settings)
     # Assume state has necessary data like means3D, scales, etc.
     rendered_image, rendered_depth, _, _, _, _ = rasterizer(means3D=state["means3D"], means2D=state["means2D"], shs=state["shs"], colors_precomp=state["colors_precomp"], opacities=state["opacities"], scales=state["scales"], rotations=state["rotations"], cov3D_precomp=state["cov3D_precomp"])

utils/image_utils.py CHANGED Viewed

@@ -276,7 +276,7 @@ def resize_image_with_aspect_ratio(image, target_width, target_height):
     original_width, original_height = image.size
     target_aspect = target_width / target_height
     original_aspect = original_width / original_height
     # Decide whether to fit width or height
     if original_aspect > target_aspect:
         # Image is wider than target aspect ratio
@@ -289,6 +289,7 @@ def resize_image_with_aspect_ratio(image, target_width, target_height):
     # Resize the image
     resized_image = image.resize((new_width, new_height), Image.LANCZOS)
     # Create a new image with target dimensions and black background
     new_image = Image.new("RGB", (target_width, target_height), (0, 0, 0))

     original_width, original_height = image.size
     target_aspect = target_width / target_height
     original_aspect = original_width / original_height
+    #print(f"Original size: {image.size}\ntarget_aspect: {target_aspect}\noriginal_aspect: {original_aspect}\n")
     # Decide whether to fit width or height
     if original_aspect > target_aspect:
         # Image is wider than target aspect ratio
     # Resize the image
     resized_image = image.resize((new_width, new_height), Image.LANCZOS)
+    #print(f"Resized size: {resized_image.size}\n")
     # Create a new image with target dimensions and black background
     new_image = Image.new("RGB", (target_width, target_height), (0, 0, 0))