Spaces:
Running
on
Zero
Running
on
Zero
aknapitsch user
commited on
Commit
·
c226bc0
1
Parent(s):
eb74057
pointcloud as mesh
Browse files- app.py +25 -7
- mapanything/utils/hf_utils/moge_utils.py +639 -0
- mapanything/utils/hf_utils/visual_util.py +269 -4
app.py
CHANGED
|
@@ -22,12 +22,12 @@ sys.path.append("mapanything/")
|
|
| 22 |
|
| 23 |
from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
|
| 24 |
from mapanything.utils.hf_utils.css_and_html import (
|
|
|
|
|
|
|
| 25 |
get_acknowledgements_html,
|
| 26 |
get_description_html,
|
| 27 |
get_gradio_theme,
|
| 28 |
get_header_html,
|
| 29 |
-
GRADIO_CSS,
|
| 30 |
-
MEASURE_INSTRUCTIONS_HTML,
|
| 31 |
)
|
| 32 |
from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
|
| 33 |
from mapanything.utils.hf_utils.visual_util import predictions_to_glb
|
|
@@ -37,7 +37,7 @@ from mapanything.utils.image import load_images, rgb
|
|
| 37 |
def get_logo_base64():
|
| 38 |
"""Convert WAI logo to base64 for embedding in HTML"""
|
| 39 |
import base64
|
| 40 |
-
|
| 41 |
logo_path = "examples/wai_logo/wai_logo.png"
|
| 42 |
try:
|
| 43 |
with open(logo_path, "rb") as img_file:
|
|
@@ -506,9 +506,7 @@ def gradio_demo(
|
|
| 506 |
|
| 507 |
print("Running MapAnything model...")
|
| 508 |
with torch.no_grad():
|
| 509 |
-
predictions, processed_data = run_model(
|
| 510 |
-
target_dir, apply_mask, mask_edges
|
| 511 |
-
)
|
| 512 |
|
| 513 |
# Save predictions
|
| 514 |
prediction_save_path = os.path.join(target_dir, "predictions.npz")
|
|
@@ -534,6 +532,7 @@ def gradio_demo(
|
|
| 534 |
mask_sky=filter_sky,
|
| 535 |
mask_black_bg=filter_black_bg,
|
| 536 |
mask_white_bg=filter_white_bg,
|
|
|
|
| 537 |
)
|
| 538 |
glbscene.export(file_obj=glbfile)
|
| 539 |
|
|
@@ -876,6 +875,7 @@ def update_visualization(
|
|
| 876 |
filter_sky=False,
|
| 877 |
filter_black_bg=False,
|
| 878 |
filter_white_bg=False,
|
|
|
|
| 879 |
):
|
| 880 |
"""
|
| 881 |
Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
|
|
@@ -923,6 +923,7 @@ def update_visualization(
|
|
| 923 |
mask_sky=filter_sky,
|
| 924 |
mask_black_bg=filter_black_bg,
|
| 925 |
mask_white_bg=filter_white_bg,
|
|
|
|
| 926 |
)
|
| 927 |
glbscene.export(file_obj=glbfile)
|
| 928 |
|
|
@@ -1145,6 +1146,7 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1145 |
with gr.Column():
|
| 1146 |
gr.Markdown("### Pointcloud options (live updates)")
|
| 1147 |
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
|
|
|
| 1148 |
filter_sky = gr.Checkbox(
|
| 1149 |
label="Filter Sky (using skyseg.onnx)", value=False
|
| 1150 |
)
|
|
@@ -1160,7 +1162,7 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1160 |
)
|
| 1161 |
mask_edges_checkbox = apply_mask_checkbox
|
| 1162 |
# ---------------------- Example Scenes Section ----------------------
|
| 1163 |
-
gr.Markdown("## Example Scenes")
|
| 1164 |
gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
|
| 1165 |
|
| 1166 |
# Get scene information
|
|
@@ -1305,6 +1307,22 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1305 |
filter_sky,
|
| 1306 |
filter_black_bg,
|
| 1307 |
filter_white_bg,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1308 |
],
|
| 1309 |
[reconstruction_output, log_output],
|
| 1310 |
)
|
|
|
|
| 22 |
|
| 23 |
from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
|
| 24 |
from mapanything.utils.hf_utils.css_and_html import (
|
| 25 |
+
GRADIO_CSS,
|
| 26 |
+
MEASURE_INSTRUCTIONS_HTML,
|
| 27 |
get_acknowledgements_html,
|
| 28 |
get_description_html,
|
| 29 |
get_gradio_theme,
|
| 30 |
get_header_html,
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
|
| 33 |
from mapanything.utils.hf_utils.visual_util import predictions_to_glb
|
|
|
|
| 37 |
def get_logo_base64():
|
| 38 |
"""Convert WAI logo to base64 for embedding in HTML"""
|
| 39 |
import base64
|
| 40 |
+
|
| 41 |
logo_path = "examples/wai_logo/wai_logo.png"
|
| 42 |
try:
|
| 43 |
with open(logo_path, "rb") as img_file:
|
|
|
|
| 506 |
|
| 507 |
print("Running MapAnything model...")
|
| 508 |
with torch.no_grad():
|
| 509 |
+
predictions, processed_data = run_model(target_dir, apply_mask, mask_edges)
|
|
|
|
|
|
|
| 510 |
|
| 511 |
# Save predictions
|
| 512 |
prediction_save_path = os.path.join(target_dir, "predictions.npz")
|
|
|
|
| 532 |
mask_sky=filter_sky,
|
| 533 |
mask_black_bg=filter_black_bg,
|
| 534 |
mask_white_bg=filter_white_bg,
|
| 535 |
+
as_mesh=True, # Default to True for reconstruction
|
| 536 |
)
|
| 537 |
glbscene.export(file_obj=glbfile)
|
| 538 |
|
|
|
|
| 875 |
filter_sky=False,
|
| 876 |
filter_black_bg=False,
|
| 877 |
filter_white_bg=False,
|
| 878 |
+
show_mesh=True,
|
| 879 |
):
|
| 880 |
"""
|
| 881 |
Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
|
|
|
|
| 923 |
mask_sky=filter_sky,
|
| 924 |
mask_black_bg=filter_black_bg,
|
| 925 |
mask_white_bg=filter_white_bg,
|
| 926 |
+
as_mesh=show_mesh,
|
| 927 |
)
|
| 928 |
glbscene.export(file_obj=glbfile)
|
| 929 |
|
|
|
|
| 1146 |
with gr.Column():
|
| 1147 |
gr.Markdown("### Pointcloud options (live updates)")
|
| 1148 |
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
| 1149 |
+
show_mesh = gr.Checkbox(label="Show mesh", value=True)
|
| 1150 |
filter_sky = gr.Checkbox(
|
| 1151 |
label="Filter Sky (using skyseg.onnx)", value=False
|
| 1152 |
)
|
|
|
|
| 1162 |
)
|
| 1163 |
mask_edges_checkbox = apply_mask_checkbox
|
| 1164 |
# ---------------------- Example Scenes Section ----------------------
|
| 1165 |
+
gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
|
| 1166 |
gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
|
| 1167 |
|
| 1168 |
# Get scene information
|
|
|
|
| 1307 |
filter_sky,
|
| 1308 |
filter_black_bg,
|
| 1309 |
filter_white_bg,
|
| 1310 |
+
show_mesh,
|
| 1311 |
+
],
|
| 1312 |
+
[reconstruction_output, log_output],
|
| 1313 |
+
)
|
| 1314 |
+
|
| 1315 |
+
show_mesh.change(
|
| 1316 |
+
update_visualization,
|
| 1317 |
+
[
|
| 1318 |
+
target_dir_output,
|
| 1319 |
+
frame_filter,
|
| 1320 |
+
show_cam,
|
| 1321 |
+
is_example,
|
| 1322 |
+
filter_sky,
|
| 1323 |
+
filter_black_bg,
|
| 1324 |
+
filter_white_bg,
|
| 1325 |
+
show_mesh,
|
| 1326 |
],
|
| 1327 |
[reconstruction_output, log_output],
|
| 1328 |
)
|
mapanything/utils/hf_utils/moge_utils.py
ADDED
|
@@ -0,0 +1,639 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import *
|
| 3 |
+
from numbers import Number
|
| 4 |
+
import warnings
|
| 5 |
+
import functools
|
| 6 |
+
|
| 7 |
+
from ._helpers import batched
|
| 8 |
+
from . import transforms
|
| 9 |
+
from . import mesh
|
| 10 |
+
|
| 11 |
+
__all__ = [
|
| 12 |
+
'sliding_window_1d',
|
| 13 |
+
'sliding_window_nd',
|
| 14 |
+
'sliding_window_2d',
|
| 15 |
+
'max_pool_1d',
|
| 16 |
+
'max_pool_2d',
|
| 17 |
+
'max_pool_nd',
|
| 18 |
+
'depth_edge',
|
| 19 |
+
'normals_edge',
|
| 20 |
+
'depth_aliasing',
|
| 21 |
+
'interpolate',
|
| 22 |
+
'image_scrcoord',
|
| 23 |
+
'image_uv',
|
| 24 |
+
'image_pixel_center',
|
| 25 |
+
'image_pixel',
|
| 26 |
+
'image_mesh',
|
| 27 |
+
'image_mesh_from_depth',
|
| 28 |
+
'points_to_normals',
|
| 29 |
+
'points_to_normals',
|
| 30 |
+
'chessboard',
|
| 31 |
+
'cube',
|
| 32 |
+
'icosahedron',
|
| 33 |
+
'square',
|
| 34 |
+
'camera_frustum',
|
| 35 |
+
'to4x4'
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def no_runtime_warnings(fn):
|
| 40 |
+
"""
|
| 41 |
+
Disable runtime warnings in numpy.
|
| 42 |
+
"""
|
| 43 |
+
@functools.wraps(fn)
|
| 44 |
+
def wrapper(*args, **kwargs):
|
| 45 |
+
with warnings.catch_warnings():
|
| 46 |
+
warnings.simplefilter("ignore")
|
| 47 |
+
return fn(*args, **kwargs)
|
| 48 |
+
return wrapper
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def sliding_window_1d(x: np.ndarray, window_size: int, stride: int, axis: int = -1):
|
| 52 |
+
"""
|
| 53 |
+
Return x view of the input array with x sliding window of the given kernel size and stride.
|
| 54 |
+
The sliding window is performed over the given axis, and the window dimension is append to the end of the output array's shape.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
x (np.ndarray): input array with shape (..., axis_size, ...)
|
| 58 |
+
kernel_size (int): size of the sliding window
|
| 59 |
+
stride (int): stride of the sliding window
|
| 60 |
+
axis (int): axis to perform sliding window over
|
| 61 |
+
|
| 62 |
+
Returns:
|
| 63 |
+
a_sliding (np.ndarray): view of the input array with shape (..., n_windows, ..., kernel_size), where n_windows = (axis_size - kernel_size + 1) // stride
|
| 64 |
+
"""
|
| 65 |
+
assert x.shape[axis] >= window_size, f"kernel_size ({window_size}) is larger than axis_size ({x.shape[axis]})"
|
| 66 |
+
axis = axis % x.ndim
|
| 67 |
+
shape = (*x.shape[:axis], (x.shape[axis] - window_size + 1) // stride, *x.shape[axis + 1:], window_size)
|
| 68 |
+
strides = (*x.strides[:axis], stride * x.strides[axis], *x.strides[axis + 1:], x.strides[axis])
|
| 69 |
+
x_sliding = np.lib.stride_tricks.as_strided(x, shape=shape, strides=strides)
|
| 70 |
+
return x_sliding
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def sliding_window_nd(x: np.ndarray, window_size: Tuple[int,...], stride: Tuple[int,...], axis: Tuple[int,...]) -> np.ndarray:
|
| 74 |
+
axis = [axis[i] % x.ndim for i in range(len(axis))]
|
| 75 |
+
for i in range(len(axis)):
|
| 76 |
+
x = sliding_window_1d(x, window_size[i], stride[i], axis[i])
|
| 77 |
+
return x
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def sliding_window_2d(x: np.ndarray, window_size: Union[int, Tuple[int, int]], stride: Union[int, Tuple[int, int]], axis: Tuple[int, int] = (-2, -1)) -> np.ndarray:
|
| 81 |
+
if isinstance(window_size, int):
|
| 82 |
+
window_size = (window_size, window_size)
|
| 83 |
+
if isinstance(stride, int):
|
| 84 |
+
stride = (stride, stride)
|
| 85 |
+
return sliding_window_nd(x, window_size, stride, axis)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def max_pool_1d(x: np.ndarray, kernel_size: int, stride: int, padding: int = 0, axis: int = -1):
|
| 89 |
+
axis = axis % x.ndim
|
| 90 |
+
if padding > 0:
|
| 91 |
+
fill_value = np.nan if x.dtype.kind == 'f' else np.iinfo(x.dtype).min
|
| 92 |
+
padding_arr = np.full((*x.shape[:axis], padding, *x.shape[axis + 1:]), fill_value=fill_value, dtype=x.dtype)
|
| 93 |
+
x = np.concatenate([padding_arr, x, padding_arr], axis=axis)
|
| 94 |
+
a_sliding = sliding_window_1d(x, kernel_size, stride, axis)
|
| 95 |
+
max_pool = np.nanmax(a_sliding, axis=-1)
|
| 96 |
+
return max_pool
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def max_pool_nd(x: np.ndarray, kernel_size: Tuple[int,...], stride: Tuple[int,...], padding: Tuple[int,...], axis: Tuple[int,...]) -> np.ndarray:
|
| 100 |
+
for i in range(len(axis)):
|
| 101 |
+
x = max_pool_1d(x, kernel_size[i], stride[i], padding[i], axis[i])
|
| 102 |
+
return x
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def max_pool_2d(x: np.ndarray, kernel_size: Union[int, Tuple[int, int]], stride: Union[int, Tuple[int, int]], padding: Union[int, Tuple[int, int]], axis: Tuple[int, int] = (-2, -1)):
|
| 106 |
+
if isinstance(kernel_size, Number):
|
| 107 |
+
kernel_size = (kernel_size, kernel_size)
|
| 108 |
+
if isinstance(stride, Number):
|
| 109 |
+
stride = (stride, stride)
|
| 110 |
+
if isinstance(padding, Number):
|
| 111 |
+
padding = (padding, padding)
|
| 112 |
+
axis = tuple(axis)
|
| 113 |
+
return max_pool_nd(x, kernel_size, stride, padding, axis)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@no_runtime_warnings
|
| 117 |
+
def depth_edge(depth: np.ndarray, atol: float = None, rtol: float = None, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
|
| 118 |
+
"""
|
| 119 |
+
Compute the edge mask from depth map. The edge is defined as the pixels whose neighbors have large difference in depth.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
depth (np.ndarray): shape (..., height, width), linear depth map
|
| 123 |
+
atol (float): absolute tolerance
|
| 124 |
+
rtol (float): relative tolerance
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
edge (np.ndarray): shape (..., height, width) of dtype torch.bool
|
| 128 |
+
"""
|
| 129 |
+
if mask is None:
|
| 130 |
+
diff = (max_pool_2d(depth, kernel_size, stride=1, padding=kernel_size // 2) + max_pool_2d(-depth, kernel_size, stride=1, padding=kernel_size // 2))
|
| 131 |
+
else:
|
| 132 |
+
diff = (max_pool_2d(np.where(mask, depth, -np.inf), kernel_size, stride=1, padding=kernel_size // 2) + max_pool_2d(np.where(mask, -depth, -np.inf), kernel_size, stride=1, padding=kernel_size // 2))
|
| 133 |
+
|
| 134 |
+
edge = np.zeros_like(depth, dtype=bool)
|
| 135 |
+
if atol is not None:
|
| 136 |
+
edge |= diff > atol
|
| 137 |
+
|
| 138 |
+
with warnings.catch_warnings():
|
| 139 |
+
warnings.simplefilter("ignore", category=RuntimeWarning)
|
| 140 |
+
if rtol is not None:
|
| 141 |
+
edge |= diff / depth > rtol
|
| 142 |
+
return edge
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
@no_runtime_warnings
|
| 146 |
+
def depth_aliasing(depth: np.ndarray, atol: float = None, rtol: float = None, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
|
| 147 |
+
"""
|
| 148 |
+
Compute the map that indicates the aliasing of x depth map. The aliasing is defined as the pixels which neither close to the maximum nor the minimum of its neighbors.
|
| 149 |
+
Args:
|
| 150 |
+
depth (np.ndarray): shape (..., height, width), linear depth map
|
| 151 |
+
atol (float): absolute tolerance
|
| 152 |
+
rtol (float): relative tolerance
|
| 153 |
+
|
| 154 |
+
Returns:
|
| 155 |
+
edge (np.ndarray): shape (..., height, width) of dtype torch.bool
|
| 156 |
+
"""
|
| 157 |
+
if mask is None:
|
| 158 |
+
diff_max = max_pool_2d(depth, kernel_size, stride=1, padding=kernel_size // 2) - depth
|
| 159 |
+
diff_min = max_pool_2d(-depth, kernel_size, stride=1, padding=kernel_size // 2) + depth
|
| 160 |
+
else:
|
| 161 |
+
diff_max = max_pool_2d(np.where(mask, depth, -np.inf), kernel_size, stride=1, padding=kernel_size // 2) - depth
|
| 162 |
+
diff_min = max_pool_2d(np.where(mask, -depth, -np.inf), kernel_size, stride=1, padding=kernel_size // 2) + depth
|
| 163 |
+
diff = np.minimum(diff_max, diff_min)
|
| 164 |
+
|
| 165 |
+
edge = np.zeros_like(depth, dtype=bool)
|
| 166 |
+
if atol is not None:
|
| 167 |
+
edge |= diff > atol
|
| 168 |
+
if rtol is not None:
|
| 169 |
+
edge |= diff / depth > rtol
|
| 170 |
+
return edge
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
@no_runtime_warnings
|
| 174 |
+
def normals_edge(normals: np.ndarray, tol: float, kernel_size: int = 3, mask: np.ndarray = None) -> np.ndarray:
|
| 175 |
+
"""
|
| 176 |
+
Compute the edge mask from normal map.
|
| 177 |
+
|
| 178 |
+
Args:
|
| 179 |
+
normal (np.ndarray): shape (..., height, width, 3), normal map
|
| 180 |
+
tol (float): tolerance in degrees
|
| 181 |
+
|
| 182 |
+
Returns:
|
| 183 |
+
edge (np.ndarray): shape (..., height, width) of dtype torch.bool
|
| 184 |
+
"""
|
| 185 |
+
assert normals.ndim >= 3 and normals.shape[-1] == 3, "normal should be of shape (..., height, width, 3)"
|
| 186 |
+
normals = normals / (np.linalg.norm(normals, axis=-1, keepdims=True) + 1e-12)
|
| 187 |
+
|
| 188 |
+
padding = kernel_size // 2
|
| 189 |
+
normals_window = sliding_window_2d(
|
| 190 |
+
np.pad(normals, (*([(0, 0)] * (normals.ndim - 3)), (padding, padding), (padding, padding), (0, 0)), mode='edge'),
|
| 191 |
+
window_size=kernel_size,
|
| 192 |
+
stride=1,
|
| 193 |
+
axis=(-3, -2)
|
| 194 |
+
)
|
| 195 |
+
if mask is None:
|
| 196 |
+
angle_diff = np.acos((normals[..., None, None] * normals_window).sum(axis=-3)).max(axis=(-2, -1))
|
| 197 |
+
else:
|
| 198 |
+
mask_window = sliding_window_2d(
|
| 199 |
+
np.pad(mask, (*([(0, 0)] * (mask.ndim - 3)), (padding, padding), (padding, padding)), mode='edge'),
|
| 200 |
+
window_size=kernel_size,
|
| 201 |
+
stride=1,
|
| 202 |
+
axis=(-3, -2)
|
| 203 |
+
)
|
| 204 |
+
angle_diff = np.where(mask_window, np.acos((normals[..., None, None] * normals_window).sum(axis=-3)), 0).max(axis=(-2, -1))
|
| 205 |
+
|
| 206 |
+
angle_diff = max_pool_2d(angle_diff, kernel_size, stride=1, padding=kernel_size // 2)
|
| 207 |
+
edge = angle_diff > np.deg2rad(tol)
|
| 208 |
+
return edge
|
| 209 |
+
|
| 210 |
+
@no_runtime_warnings
|
| 211 |
+
def points_to_normals(point: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
|
| 212 |
+
"""
|
| 213 |
+
Calculate normal map from point map. Value range is [-1, 1]. Normal direction in OpenGL identity camera's coordinate system.
|
| 214 |
+
|
| 215 |
+
Args:
|
| 216 |
+
point (np.ndarray): shape (height, width, 3), point map
|
| 217 |
+
Returns:
|
| 218 |
+
normal (np.ndarray): shape (height, width, 3), normal map.
|
| 219 |
+
"""
|
| 220 |
+
height, width = point.shape[-3:-1]
|
| 221 |
+
has_mask = mask is not None
|
| 222 |
+
|
| 223 |
+
if mask is None:
|
| 224 |
+
mask = np.ones_like(point[..., 0], dtype=bool)
|
| 225 |
+
mask_pad = np.zeros((height + 2, width + 2), dtype=bool)
|
| 226 |
+
mask_pad[1:-1, 1:-1] = mask
|
| 227 |
+
mask = mask_pad
|
| 228 |
+
|
| 229 |
+
pts = np.zeros((height + 2, width + 2, 3), dtype=point.dtype)
|
| 230 |
+
pts[1:-1, 1:-1, :] = point
|
| 231 |
+
up = pts[:-2, 1:-1, :] - pts[1:-1, 1:-1, :]
|
| 232 |
+
left = pts[1:-1, :-2, :] - pts[1:-1, 1:-1, :]
|
| 233 |
+
down = pts[2:, 1:-1, :] - pts[1:-1, 1:-1, :]
|
| 234 |
+
right = pts[1:-1, 2:, :] - pts[1:-1, 1:-1, :]
|
| 235 |
+
normal = np.stack([
|
| 236 |
+
np.cross(up, left, axis=-1),
|
| 237 |
+
np.cross(left, down, axis=-1),
|
| 238 |
+
np.cross(down, right, axis=-1),
|
| 239 |
+
np.cross(right, up, axis=-1),
|
| 240 |
+
])
|
| 241 |
+
normal = normal / (np.linalg.norm(normal, axis=-1, keepdims=True) + 1e-12)
|
| 242 |
+
valid = np.stack([
|
| 243 |
+
mask[:-2, 1:-1] & mask[1:-1, :-2],
|
| 244 |
+
mask[1:-1, :-2] & mask[2:, 1:-1],
|
| 245 |
+
mask[2:, 1:-1] & mask[1:-1, 2:],
|
| 246 |
+
mask[1:-1, 2:] & mask[:-2, 1:-1],
|
| 247 |
+
]) & mask[None, 1:-1, 1:-1]
|
| 248 |
+
normal = (normal * valid[..., None]).sum(axis=0)
|
| 249 |
+
normal = normal / (np.linalg.norm(normal, axis=-1, keepdims=True) + 1e-12)
|
| 250 |
+
|
| 251 |
+
if has_mask:
|
| 252 |
+
normal_mask = valid.any(axis=0)
|
| 253 |
+
normal = np.where(normal_mask[..., None], normal, 0)
|
| 254 |
+
return normal, normal_mask
|
| 255 |
+
else:
|
| 256 |
+
return normal
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def depth_to_normals(depth: np.ndarray, intrinsics: np.ndarray, mask: np.ndarray = None) -> np.ndarray:
|
| 260 |
+
"""
|
| 261 |
+
Calculate normal map from depth map. Value range is [-1, 1]. Normal direction in OpenGL identity camera's coordinate system.
|
| 262 |
+
|
| 263 |
+
Args:
|
| 264 |
+
depth (np.ndarray): shape (height, width), linear depth map
|
| 265 |
+
intrinsics (np.ndarray): shape (3, 3), intrinsics matrix
|
| 266 |
+
Returns:
|
| 267 |
+
normal (np.ndarray): shape (height, width, 3), normal map.
|
| 268 |
+
"""
|
| 269 |
+
has_mask = mask is not None
|
| 270 |
+
|
| 271 |
+
height, width = depth.shape[-2:]
|
| 272 |
+
if mask is None:
|
| 273 |
+
mask = np.ones_like(depth, dtype=bool)
|
| 274 |
+
|
| 275 |
+
uv = image_uv(width=width, height=height, dtype=np.float32)
|
| 276 |
+
pts = transforms.unproject_cv(uv, depth, intrinsics=intrinsics, extrinsics=None)
|
| 277 |
+
|
| 278 |
+
return points_to_normals(pts, mask)
|
| 279 |
+
|
| 280 |
+
def interpolate(bary: np.ndarray, tri_id: np.ndarray, attr: np.ndarray, faces: np.ndarray) -> np.ndarray:
|
| 281 |
+
"""Interpolate with given barycentric coordinates and triangle indices
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
bary (np.ndarray): shape (..., 3), barycentric coordinates
|
| 285 |
+
tri_id (np.ndarray): int array of shape (...), triangle indices
|
| 286 |
+
attr (np.ndarray): shape (N, M), vertices attributes
|
| 287 |
+
faces (np.ndarray): int array of shape (T, 3), face vertex indices
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
np.ndarray: shape (..., M) interpolated result
|
| 291 |
+
"""
|
| 292 |
+
faces_ = np.concatenate([np.zeros((1, 3), dtype=faces.dtype), faces + 1], axis=0)
|
| 293 |
+
attr_ = np.concatenate([np.zeros((1, attr.shape[1]), dtype=attr.dtype), attr], axis=0)
|
| 294 |
+
return np.sum(bary[..., None] * attr_[faces_[tri_id + 1]], axis=-2)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def image_scrcoord(
|
| 298 |
+
width: int,
|
| 299 |
+
height: int,
|
| 300 |
+
) -> np.ndarray:
|
| 301 |
+
"""
|
| 302 |
+
Get OpenGL's screen space coordinates, ranging in [0, 1].
|
| 303 |
+
[0, 0] is the bottom-left corner of the image.
|
| 304 |
+
|
| 305 |
+
Args:
|
| 306 |
+
width (int): image width
|
| 307 |
+
height (int): image height
|
| 308 |
+
|
| 309 |
+
Returns:
|
| 310 |
+
(np.ndarray): shape (height, width, 2)
|
| 311 |
+
"""
|
| 312 |
+
x, y = np.meshgrid(
|
| 313 |
+
np.linspace(0.5 / width, 1 - 0.5 / width, width, dtype=np.float32),
|
| 314 |
+
np.linspace(1 - 0.5 / height, 0.5 / height, height, dtype=np.float32),
|
| 315 |
+
indexing='xy'
|
| 316 |
+
)
|
| 317 |
+
return np.stack([x, y], axis=2)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def image_uv(
|
| 321 |
+
height: int,
|
| 322 |
+
width: int,
|
| 323 |
+
left: int = None,
|
| 324 |
+
top: int = None,
|
| 325 |
+
right: int = None,
|
| 326 |
+
bottom: int = None,
|
| 327 |
+
dtype: np.dtype = np.float32
|
| 328 |
+
) -> np.ndarray:
|
| 329 |
+
"""
|
| 330 |
+
Get image space UV grid, ranging in [0, 1].
|
| 331 |
+
|
| 332 |
+
>>> image_uv(10, 10):
|
| 333 |
+
[[[0.05, 0.05], [0.15, 0.05], ..., [0.95, 0.05]],
|
| 334 |
+
[[0.05, 0.15], [0.15, 0.15], ..., [0.95, 0.15]],
|
| 335 |
+
... ... ...
|
| 336 |
+
[[0.05, 0.95], [0.15, 0.95], ..., [0.95, 0.95]]]
|
| 337 |
+
|
| 338 |
+
Args:
|
| 339 |
+
width (int): image width
|
| 340 |
+
height (int): image height
|
| 341 |
+
|
| 342 |
+
Returns:
|
| 343 |
+
np.ndarray: shape (height, width, 2)
|
| 344 |
+
"""
|
| 345 |
+
if left is None: left = 0
|
| 346 |
+
if top is None: top = 0
|
| 347 |
+
if right is None: right = width
|
| 348 |
+
if bottom is None: bottom = height
|
| 349 |
+
u = np.linspace((left + 0.5) / width, (right - 0.5) / width, right - left, dtype=dtype)
|
| 350 |
+
v = np.linspace((top + 0.5) / height, (bottom - 0.5) / height, bottom - top, dtype=dtype)
|
| 351 |
+
u, v = np.meshgrid(u, v, indexing='xy')
|
| 352 |
+
return np.stack([u, v], axis=2)
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def image_pixel_center(
|
| 356 |
+
height: int,
|
| 357 |
+
width: int,
|
| 358 |
+
left: int = None,
|
| 359 |
+
top: int = None,
|
| 360 |
+
right: int = None,
|
| 361 |
+
bottom: int = None,
|
| 362 |
+
dtype: np.dtype = np.float32
|
| 363 |
+
) -> np.ndarray:
|
| 364 |
+
"""
|
| 365 |
+
Get image pixel center coordinates, ranging in [0, width] and [0, height].
|
| 366 |
+
`image[i, j]` has pixel center coordinates `(j + 0.5, i + 0.5)`.
|
| 367 |
+
|
| 368 |
+
>>> image_pixel_center(10, 10):
|
| 369 |
+
[[[0.5, 0.5], [1.5, 0.5], ..., [9.5, 0.5]],
|
| 370 |
+
[[0.5, 1.5], [1.5, 1.5], ..., [9.5, 1.5]],
|
| 371 |
+
... ... ...
|
| 372 |
+
[[0.5, 9.5], [1.5, 9.5], ..., [9.5, 9.5]]]
|
| 373 |
+
|
| 374 |
+
Args:
|
| 375 |
+
width (int): image width
|
| 376 |
+
height (int): image height
|
| 377 |
+
|
| 378 |
+
Returns:
|
| 379 |
+
np.ndarray: shape (height, width, 2)
|
| 380 |
+
"""
|
| 381 |
+
if left is None: left = 0
|
| 382 |
+
if top is None: top = 0
|
| 383 |
+
if right is None: right = width
|
| 384 |
+
if bottom is None: bottom = height
|
| 385 |
+
u = np.linspace(left + 0.5, right - 0.5, right - left, dtype=dtype)
|
| 386 |
+
v = np.linspace(top + 0.5, bottom - 0.5, bottom - top, dtype=dtype)
|
| 387 |
+
u, v = np.meshgrid(u, v, indexing='xy')
|
| 388 |
+
return np.stack([u, v], axis=2)
|
| 389 |
+
|
| 390 |
+
def image_pixel(
|
| 391 |
+
height: int,
|
| 392 |
+
width: int,
|
| 393 |
+
left: int = None,
|
| 394 |
+
top: int = None,
|
| 395 |
+
right: int = None,
|
| 396 |
+
bottom: int = None,
|
| 397 |
+
dtype: np.dtype = np.int32
|
| 398 |
+
) -> np.ndarray:
|
| 399 |
+
"""
|
| 400 |
+
Get image pixel coordinates grid, ranging in [0, width - 1] and [0, height - 1].
|
| 401 |
+
`image[i, j]` has pixel center coordinates `(j, i)`.
|
| 402 |
+
|
| 403 |
+
>>> image_pixel_center(10, 10):
|
| 404 |
+
[[[0, 0], [1, 0], ..., [9, 0]],
|
| 405 |
+
[[0, 1.5], [1, 1], ..., [9, 1]],
|
| 406 |
+
... ... ...
|
| 407 |
+
[[0, 9.5], [1, 9], ..., [9, 9 ]]]
|
| 408 |
+
|
| 409 |
+
Args:
|
| 410 |
+
width (int): image width
|
| 411 |
+
height (int): image height
|
| 412 |
+
|
| 413 |
+
Returns:
|
| 414 |
+
np.ndarray: shape (height, width, 2)
|
| 415 |
+
"""
|
| 416 |
+
if left is None: left = 0
|
| 417 |
+
if top is None: top = 0
|
| 418 |
+
if right is None: right = width
|
| 419 |
+
if bottom is None: bottom = height
|
| 420 |
+
u = np.arange(left, right, dtype=dtype)
|
| 421 |
+
v = np.arange(top, bottom, dtype=dtype)
|
| 422 |
+
u, v = np.meshgrid(u, v, indexing='xy')
|
| 423 |
+
return np.stack([u, v], axis=2)
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
def image_mesh(
|
| 427 |
+
*image_attrs: np.ndarray,
|
| 428 |
+
mask: np.ndarray = None,
|
| 429 |
+
tri: bool = False,
|
| 430 |
+
return_indices: bool = False
|
| 431 |
+
) -> Tuple[np.ndarray, ...]:
|
| 432 |
+
"""
|
| 433 |
+
Get a mesh regarding image pixel uv coordinates as vertices and image grid as faces.
|
| 434 |
+
|
| 435 |
+
Args:
|
| 436 |
+
*image_attrs (np.ndarray): image attributes in shape (height, width, [channels])
|
| 437 |
+
mask (np.ndarray, optional): binary mask of shape (height, width), dtype=bool. Defaults to None.
|
| 438 |
+
|
| 439 |
+
Returns:
|
| 440 |
+
faces (np.ndarray): faces connecting neighboring pixels. shape (T, 4) if tri is False, else (T, 3)
|
| 441 |
+
*vertex_attrs (np.ndarray): vertex attributes in corresponding order with input image_attrs
|
| 442 |
+
indices (np.ndarray, optional): indices of vertices in the original mesh
|
| 443 |
+
"""
|
| 444 |
+
assert (len(image_attrs) > 0) or (mask is not None), "At least one of image_attrs or mask should be provided"
|
| 445 |
+
height, width = next(image_attrs).shape[:2] if mask is None else mask.shape
|
| 446 |
+
assert all(img.shape[:2] == (height, width) for img in image_attrs), "All image_attrs should have the same shape"
|
| 447 |
+
|
| 448 |
+
row_faces = np.stack([np.arange(0, width - 1, dtype=np.int32), np.arange(width, 2 * width - 1, dtype=np.int32), np.arange(1 + width, 2 * width, dtype=np.int32), np.arange(1, width, dtype=np.int32)], axis=1)
|
| 449 |
+
faces = (np.arange(0, (height - 1) * width, width, dtype=np.int32)[:, None, None] + row_faces[None, :, :]).reshape((-1, 4))
|
| 450 |
+
if mask is None:
|
| 451 |
+
if tri:
|
| 452 |
+
faces = mesh.triangulate(faces)
|
| 453 |
+
ret = [faces, *(img.reshape(-1, *img.shape[2:]) for img in image_attrs)]
|
| 454 |
+
if return_indices:
|
| 455 |
+
ret.append(np.arange(height * width, dtype=np.int32))
|
| 456 |
+
return tuple(ret)
|
| 457 |
+
else:
|
| 458 |
+
quad_mask = (mask[:-1, :-1] & mask[1:, :-1] & mask[1:, 1:] & mask[:-1, 1:]).ravel()
|
| 459 |
+
faces = faces[quad_mask]
|
| 460 |
+
if tri:
|
| 461 |
+
faces = mesh.triangulate(faces)
|
| 462 |
+
return mesh.remove_unreferenced_vertices(
|
| 463 |
+
faces,
|
| 464 |
+
*(x.reshape(-1, *x.shape[2:]) for x in image_attrs),
|
| 465 |
+
return_indices=return_indices
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
def image_mesh_from_depth(
|
| 469 |
+
depth: np.ndarray,
|
| 470 |
+
extrinsics: np.ndarray = None,
|
| 471 |
+
intrinsics: np.ndarray = None,
|
| 472 |
+
*vertice_attrs: np.ndarray,
|
| 473 |
+
atol: float = None,
|
| 474 |
+
rtol: float = None,
|
| 475 |
+
remove_by_depth: bool = False,
|
| 476 |
+
return_uv: bool = False,
|
| 477 |
+
return_indices: bool = False
|
| 478 |
+
) -> Tuple[np.ndarray, ...]:
|
| 479 |
+
"""
|
| 480 |
+
Get x triangle mesh by lifting depth map to 3D.
|
| 481 |
+
|
| 482 |
+
Args:
|
| 483 |
+
depth (np.ndarray): [H, W] depth map
|
| 484 |
+
extrinsics (np.ndarray, optional): [4, 4] extrinsics matrix. Defaults to None.
|
| 485 |
+
intrinsics (np.ndarray, optional): [3, 3] intrinsics matrix. Defaults to None.
|
| 486 |
+
*vertice_attrs (np.ndarray): [H, W, C] vertex attributes. Defaults to None.
|
| 487 |
+
atol (float, optional): absolute tolerance. Defaults to None.
|
| 488 |
+
rtol (float, optional): relative tolerance. Defaults to None.
|
| 489 |
+
triangles with vertices having depth difference larger than atol + rtol * depth will be marked.
|
| 490 |
+
remove_by_depth (bool, optional): whether to remove triangles with large depth difference. Defaults to True.
|
| 491 |
+
return_uv (bool, optional): whether to return uv coordinates. Defaults to False.
|
| 492 |
+
return_indices (bool, optional): whether to return indices of vertices in the original mesh. Defaults to False.
|
| 493 |
+
|
| 494 |
+
Returns:
|
| 495 |
+
vertices (np.ndarray): [N, 3] vertices
|
| 496 |
+
faces (np.ndarray): [T, 3] faces
|
| 497 |
+
*vertice_attrs (np.ndarray): [N, C] vertex attributes
|
| 498 |
+
image_uv (np.ndarray, optional): [N, 2] uv coordinates
|
| 499 |
+
ref_indices (np.ndarray, optional): [N] indices of vertices in the original mesh
|
| 500 |
+
"""
|
| 501 |
+
height, width = depth.shape
|
| 502 |
+
image_uv, image_face = image_mesh(height, width)
|
| 503 |
+
depth = depth.reshape(-1)
|
| 504 |
+
pts = transforms.unproject_cv(image_uv, depth, extrinsics, intrinsics)
|
| 505 |
+
image_face = mesh.triangulate(image_face, vertices=pts)
|
| 506 |
+
ref_indices = None
|
| 507 |
+
ret = []
|
| 508 |
+
if atol is not None or rtol is not None:
|
| 509 |
+
atol = 0 if atol is None else atol
|
| 510 |
+
rtol = 0 if rtol is None else rtol
|
| 511 |
+
mean = depth[image_face].mean(axis=1)
|
| 512 |
+
diff = np.max(np.abs(depth[image_face] - depth[image_face[:, [1, 2, 0]]]), axis=1)
|
| 513 |
+
mask = (diff <= atol + rtol * mean)
|
| 514 |
+
image_face_ = image_face[mask]
|
| 515 |
+
image_face_, ref_indices = mesh.remove_unreferenced_vertices(image_face_, return_indices=True)
|
| 516 |
+
|
| 517 |
+
remove = remove_by_depth and ref_indices is not None
|
| 518 |
+
if remove:
|
| 519 |
+
pts = pts[ref_indices]
|
| 520 |
+
image_face = image_face_
|
| 521 |
+
ret += [pts, image_face]
|
| 522 |
+
for attr in vertice_attrs:
|
| 523 |
+
ret.append(attr.reshape(-1, attr.shape[-1]) if not remove else attr.reshape(-1, attr.shape[-1])[ref_indices])
|
| 524 |
+
if return_uv:
|
| 525 |
+
ret.append(image_uv if not remove else image_uv[ref_indices])
|
| 526 |
+
if return_indices and ref_indices is not None:
|
| 527 |
+
ret.append(ref_indices)
|
| 528 |
+
return tuple(ret)
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
def chessboard(width: int, height: int, grid_size: int, color_a: np.ndarray, color_b: np.ndarray) -> np.ndarray:
|
| 532 |
+
"""get x chessboard image
|
| 533 |
+
|
| 534 |
+
Args:
|
| 535 |
+
width (int): image width
|
| 536 |
+
height (int): image height
|
| 537 |
+
grid_size (int): size of chessboard grid
|
| 538 |
+
color_a (np.ndarray): color of the grid at the top-left corner
|
| 539 |
+
color_b (np.ndarray): color in complementary grid cells
|
| 540 |
+
|
| 541 |
+
Returns:
|
| 542 |
+
image (np.ndarray): shape (height, width, channels), chessboard image
|
| 543 |
+
"""
|
| 544 |
+
x = np.arange(width) // grid_size
|
| 545 |
+
y = np.arange(height) // grid_size
|
| 546 |
+
mask = (x[None, :] + y[:, None]) % 2
|
| 547 |
+
image = (1 - mask[..., None]) * color_a + mask[..., None] * color_b
|
| 548 |
+
return image
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def square(tri: bool = False) -> Tuple[np.ndarray, np.ndarray]:
|
| 552 |
+
"""
|
| 553 |
+
Get a square mesh of area 1 centered at origin in the xy-plane.
|
| 554 |
+
|
| 555 |
+
### Returns
|
| 556 |
+
vertices (np.ndarray): shape (4, 3)
|
| 557 |
+
faces (np.ndarray): shape (1, 4)
|
| 558 |
+
"""
|
| 559 |
+
vertices = np.array([
|
| 560 |
+
[-0.5, 0.5, 0], [0.5, 0.5, 0], [0.5, -0.5, 0], [-0.5, -0.5, 0] # v0-v1-v2-v3
|
| 561 |
+
], dtype=np.float32)
|
| 562 |
+
if tri:
|
| 563 |
+
faces = np.array([[0, 1, 2], [0, 2, 3]], dtype=np.int32)
|
| 564 |
+
else:
|
| 565 |
+
faces = np.array([[0, 1, 2, 3]], dtype=np.int32)
|
| 566 |
+
return vertices, faces
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
def cube(tri: bool = False) -> Tuple[np.ndarray, np.ndarray]:
|
| 570 |
+
"""
|
| 571 |
+
Get x cube mesh of size 1 centered at origin.
|
| 572 |
+
|
| 573 |
+
### Parameters
|
| 574 |
+
tri (bool, optional): return triangulated mesh. Defaults to False, which returns quad mesh.
|
| 575 |
+
|
| 576 |
+
### Returns
|
| 577 |
+
vertices (np.ndarray): shape (8, 3)
|
| 578 |
+
faces (np.ndarray): shape (12, 3)
|
| 579 |
+
"""
|
| 580 |
+
vertices = np.array([
|
| 581 |
+
[-0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, -0.5, 0.5], [-0.5, -0.5, 0.5], # v0-v1-v2-v3
|
| 582 |
+
[-0.5, 0.5, -0.5], [0.5, 0.5, -0.5], [0.5, -0.5, -0.5], [-0.5, -0.5, -0.5] # v4-v5-v6-v7
|
| 583 |
+
], dtype=np.float32).reshape((-1, 3))
|
| 584 |
+
|
| 585 |
+
faces = np.array([
|
| 586 |
+
[0, 1, 2, 3], # v0-v1-v2-v3 (front)
|
| 587 |
+
[4, 5, 1, 0], # v4-v5-v1-v0 (top)
|
| 588 |
+
[3, 2, 6, 7], # v3-v2-v6-v7 (bottom)
|
| 589 |
+
[5, 4, 7, 6], # v5-v4-v7-v6 (back)
|
| 590 |
+
[1, 5, 6, 2], # v1-v5-v6-v2 (right)
|
| 591 |
+
[4, 0, 3, 7] # v4-v0-v3-v7 (left)
|
| 592 |
+
], dtype=np.int32)
|
| 593 |
+
|
| 594 |
+
if tri:
|
| 595 |
+
faces = mesh.triangulate(faces, vertices=vertices)
|
| 596 |
+
|
| 597 |
+
return vertices, faces
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
def camera_frustum(extrinsics: np.ndarray, intrinsics: np.ndarray, depth: float = 1.0) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 601 |
+
"""
|
| 602 |
+
Get x triangle mesh of camera frustum.
|
| 603 |
+
"""
|
| 604 |
+
assert extrinsics.shape == (4, 4) and intrinsics.shape == (3, 3)
|
| 605 |
+
vertices = transforms.unproject_cv(
|
| 606 |
+
np.array([[0, 0], [0, 0], [0, 1], [1, 1], [1, 0]], dtype=np.float32),
|
| 607 |
+
np.array([0] + [depth] * 4, dtype=np.float32),
|
| 608 |
+
extrinsics,
|
| 609 |
+
intrinsics
|
| 610 |
+
).astype(np.float32)
|
| 611 |
+
edges = np.array([
|
| 612 |
+
[0, 1], [0, 2], [0, 3], [0, 4],
|
| 613 |
+
[1, 2], [2, 3], [3, 4], [4, 1]
|
| 614 |
+
], dtype=np.int32)
|
| 615 |
+
faces = np.array([
|
| 616 |
+
[0, 1, 2],
|
| 617 |
+
[0, 2, 3],
|
| 618 |
+
[0, 3, 4],
|
| 619 |
+
[0, 4, 1],
|
| 620 |
+
[1, 2, 3],
|
| 621 |
+
[1, 3, 4]
|
| 622 |
+
], dtype=np.int32)
|
| 623 |
+
return vertices, edges, faces
|
| 624 |
+
|
| 625 |
+
|
| 626 |
+
def icosahedron():
|
| 627 |
+
A = (1 + 5 ** 0.5) / 2
|
| 628 |
+
vertices = np.array([
|
| 629 |
+
[0, 1, A], [0, -1, A], [0, 1, -A], [0, -1, -A],
|
| 630 |
+
[1, A, 0], [-1, A, 0], [1, -A, 0], [-1, -A, 0],
|
| 631 |
+
[A, 0, 1], [A, 0, -1], [-A, 0, 1], [-A, 0, -1]
|
| 632 |
+
], dtype=np.float32)
|
| 633 |
+
faces = np.array([
|
| 634 |
+
[0, 1, 8], [0, 8, 4], [0, 4, 5], [0, 5, 10], [0, 10, 1],
|
| 635 |
+
[3, 2, 9], [3, 9, 6], [3, 6, 7], [3, 7, 11], [3, 11, 2],
|
| 636 |
+
[1, 6, 8], [8, 9, 4], [4, 2, 5], [5, 11, 10], [10, 7, 1],
|
| 637 |
+
[2, 4, 9], [9, 8, 6], [6, 1, 7], [7, 10, 11], [11, 5, 2]
|
| 638 |
+
], dtype=np.int32)
|
| 639 |
+
return vertices, faces
|
mapanything/utils/hf_utils/visual_util.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
|
| 7 |
import copy
|
| 8 |
import os
|
|
|
|
| 9 |
|
| 10 |
import cv2
|
| 11 |
import matplotlib
|
|
@@ -15,6 +16,139 @@ import trimesh
|
|
| 15 |
from scipy.spatial.transform import Rotation
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def predictions_to_glb(
|
| 19 |
predictions,
|
| 20 |
conf_thres=50.0,
|
|
@@ -26,6 +160,7 @@ def predictions_to_glb(
|
|
| 26 |
target_dir=None,
|
| 27 |
prediction_mode="Predicted Pointmap",
|
| 28 |
mask_ambiguous=False,
|
|
|
|
| 29 |
) -> trimesh.Scene:
|
| 30 |
"""
|
| 31 |
Converts VGGT predictions to a 3D scene represented as a GLB file.
|
|
@@ -44,9 +179,11 @@ def predictions_to_glb(
|
|
| 44 |
mask_sky (bool): Apply sky segmentation mask (default: False)
|
| 45 |
target_dir (str): Output directory for intermediate files (default: None)
|
| 46 |
prediction_mode (str): Prediction mode selector (default: "Predicted Pointmap")
|
|
|
|
|
|
|
| 47 |
|
| 48 |
Returns:
|
| 49 |
-
trimesh.Scene: Processed 3D scene containing point cloud and cameras
|
| 50 |
|
| 51 |
Raises:
|
| 52 |
ValueError: If input predictions structure is invalid
|
|
@@ -215,9 +352,135 @@ def predictions_to_glb(
|
|
| 215 |
scene_3d = trimesh.Scene()
|
| 216 |
|
| 217 |
# Add point cloud data to the scene
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
# Prepare 4x4 matrices for camera extrinsics
|
| 223 |
num_cameras = len(camera_matrices)
|
|
@@ -259,9 +522,11 @@ def integrate_camera_into_scene(
|
|
| 259 |
face_colors (tuple): Color of the camera face.
|
| 260 |
scene_scale (float): Scale of the scene.
|
| 261 |
"""
|
| 262 |
-
|
| 263 |
cam_width = scene_scale * 0.05
|
| 264 |
cam_height = scene_scale * 0.1
|
|
|
|
|
|
|
| 265 |
|
| 266 |
# Create cone shape for camera
|
| 267 |
rot_45_degree = np.eye(4)
|
|
|
|
| 6 |
|
| 7 |
import copy
|
| 8 |
import os
|
| 9 |
+
from typing import Tuple
|
| 10 |
|
| 11 |
import cv2
|
| 12 |
import matplotlib
|
|
|
|
| 16 |
from scipy.spatial.transform import Rotation
|
| 17 |
|
| 18 |
|
| 19 |
+
def remove_unreferenced_vertices(
|
| 20 |
+
faces: np.ndarray, *vertice_attrs, return_indices: bool = False
|
| 21 |
+
) -> Tuple[np.ndarray, ...]:
|
| 22 |
+
"""
|
| 23 |
+
Remove unreferenced vertices of a mesh.
|
| 24 |
+
Unreferenced vertices are removed, and the face indices are updated accordingly.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
faces (np.ndarray): [T, P] face indices
|
| 28 |
+
*vertice_attrs: vertex attributes
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
faces (np.ndarray): [T, P] face indices
|
| 32 |
+
*vertice_attrs: vertex attributes
|
| 33 |
+
indices (np.ndarray, optional): [N] indices of vertices that are kept. Defaults to None.
|
| 34 |
+
"""
|
| 35 |
+
P = faces.shape[-1]
|
| 36 |
+
fewer_indices, inv_map = np.unique(faces, return_inverse=True)
|
| 37 |
+
faces = inv_map.astype(np.int32).reshape(-1, P)
|
| 38 |
+
ret = [faces]
|
| 39 |
+
for attr in vertice_attrs:
|
| 40 |
+
ret.append(attr[fewer_indices])
|
| 41 |
+
if return_indices:
|
| 42 |
+
ret.append(fewer_indices)
|
| 43 |
+
return tuple(ret)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def triangulate(
|
| 47 |
+
faces: np.ndarray, vertices: np.ndarray = None, backslash: np.ndarray = None
|
| 48 |
+
) -> np.ndarray:
|
| 49 |
+
"""
|
| 50 |
+
Triangulate a polygonal mesh.
|
| 51 |
+
|
| 52 |
+
Args:
|
| 53 |
+
faces (np.ndarray): [L, P] polygonal faces
|
| 54 |
+
vertices (np.ndarray, optional): [N, 3] 3-dimensional vertices.
|
| 55 |
+
If given, the triangulation is performed according to the distance
|
| 56 |
+
between vertices. Defaults to None.
|
| 57 |
+
backslash (np.ndarray, optional): [L] boolean array indicating
|
| 58 |
+
how to triangulate the quad faces. Defaults to None.
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
(np.ndarray): [L * (P - 2), 3] triangular faces
|
| 62 |
+
"""
|
| 63 |
+
if faces.shape[-1] == 3:
|
| 64 |
+
return faces
|
| 65 |
+
P = faces.shape[-1]
|
| 66 |
+
if vertices is not None:
|
| 67 |
+
assert faces.shape[-1] == 4, "now only support quad mesh"
|
| 68 |
+
if backslash is None:
|
| 69 |
+
backslash = np.linalg.norm(
|
| 70 |
+
vertices[faces[:, 0]] - vertices[faces[:, 2]], axis=-1
|
| 71 |
+
) < np.linalg.norm(vertices[faces[:, 1]] - vertices[faces[:, 3]], axis=-1)
|
| 72 |
+
if backslash is None:
|
| 73 |
+
loop_indice = np.stack(
|
| 74 |
+
[
|
| 75 |
+
np.zeros(P - 2, dtype=int),
|
| 76 |
+
np.arange(1, P - 1, 1, dtype=int),
|
| 77 |
+
np.arange(2, P, 1, dtype=int),
|
| 78 |
+
],
|
| 79 |
+
axis=1,
|
| 80 |
+
)
|
| 81 |
+
return faces[:, loop_indice].reshape((-1, 3))
|
| 82 |
+
else:
|
| 83 |
+
assert faces.shape[-1] == 4, "now only support quad mesh"
|
| 84 |
+
faces = np.where(
|
| 85 |
+
backslash[:, None],
|
| 86 |
+
faces[:, [0, 1, 2, 0, 2, 3]],
|
| 87 |
+
faces[:, [0, 1, 3, 3, 1, 2]],
|
| 88 |
+
).reshape((-1, 3))
|
| 89 |
+
return faces
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def image_mesh(
|
| 93 |
+
*image_attrs: np.ndarray,
|
| 94 |
+
mask: np.ndarray = None,
|
| 95 |
+
tri: bool = False,
|
| 96 |
+
return_indices: bool = False,
|
| 97 |
+
) -> Tuple[np.ndarray, ...]:
|
| 98 |
+
"""
|
| 99 |
+
Get a mesh regarding image pixel uv coordinates as vertices and image grid as faces.
|
| 100 |
+
|
| 101 |
+
Args:
|
| 102 |
+
*image_attrs (np.ndarray): image attributes in shape (height, width, [channels])
|
| 103 |
+
mask (np.ndarray, optional): binary mask of shape (height, width), dtype=bool. Defaults to None.
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
faces (np.ndarray): faces connecting neighboring pixels. shape (T, 4) if tri is False, else (T, 3)
|
| 107 |
+
*vertex_attrs (np.ndarray): vertex attributes in corresponding order with input image_attrs
|
| 108 |
+
indices (np.ndarray, optional): indices of vertices in the original mesh
|
| 109 |
+
"""
|
| 110 |
+
assert (len(image_attrs) > 0) or (mask is not None), (
|
| 111 |
+
"At least one of image_attrs or mask should be provided"
|
| 112 |
+
)
|
| 113 |
+
height, width = next(image_attrs).shape[:2] if mask is None else mask.shape
|
| 114 |
+
assert all(img.shape[:2] == (height, width) for img in image_attrs), (
|
| 115 |
+
"All image_attrs should have the same shape"
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
row_faces = np.stack(
|
| 119 |
+
[
|
| 120 |
+
np.arange(0, width - 1, dtype=np.int32),
|
| 121 |
+
np.arange(width, 2 * width - 1, dtype=np.int32),
|
| 122 |
+
np.arange(1 + width, 2 * width, dtype=np.int32),
|
| 123 |
+
np.arange(1, width, dtype=np.int32),
|
| 124 |
+
],
|
| 125 |
+
axis=1,
|
| 126 |
+
)
|
| 127 |
+
faces = (
|
| 128 |
+
np.arange(0, (height - 1) * width, width, dtype=np.int32)[:, None, None]
|
| 129 |
+
+ row_faces[None, :, :]
|
| 130 |
+
).reshape((-1, 4))
|
| 131 |
+
if mask is None:
|
| 132 |
+
if tri:
|
| 133 |
+
faces = triangulate(faces)
|
| 134 |
+
ret = [faces, *(img.reshape(-1, *img.shape[2:]) for img in image_attrs)]
|
| 135 |
+
if return_indices:
|
| 136 |
+
ret.append(np.arange(height * width, dtype=np.int32))
|
| 137 |
+
return tuple(ret)
|
| 138 |
+
else:
|
| 139 |
+
quad_mask = (
|
| 140 |
+
mask[:-1, :-1] & mask[1:, :-1] & mask[1:, 1:] & mask[:-1, 1:]
|
| 141 |
+
).ravel()
|
| 142 |
+
faces = faces[quad_mask]
|
| 143 |
+
if tri:
|
| 144 |
+
faces = triangulate(faces)
|
| 145 |
+
return remove_unreferenced_vertices(
|
| 146 |
+
faces,
|
| 147 |
+
*(x.reshape(-1, *x.shape[2:]) for x in image_attrs),
|
| 148 |
+
return_indices=return_indices,
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
def predictions_to_glb(
|
| 153 |
predictions,
|
| 154 |
conf_thres=50.0,
|
|
|
|
| 160 |
target_dir=None,
|
| 161 |
prediction_mode="Predicted Pointmap",
|
| 162 |
mask_ambiguous=False,
|
| 163 |
+
as_mesh=True,
|
| 164 |
) -> trimesh.Scene:
|
| 165 |
"""
|
| 166 |
Converts VGGT predictions to a 3D scene represented as a GLB file.
|
|
|
|
| 179 |
mask_sky (bool): Apply sky segmentation mask (default: False)
|
| 180 |
target_dir (str): Output directory for intermediate files (default: None)
|
| 181 |
prediction_mode (str): Prediction mode selector (default: "Predicted Pointmap")
|
| 182 |
+
mask_ambiguous (bool): Apply final mask to filter ambiguous predictions (default: False)
|
| 183 |
+
as_mesh (bool): Represent the data as a mesh instead of point cloud (default: False)
|
| 184 |
|
| 185 |
Returns:
|
| 186 |
+
trimesh.Scene: Processed 3D scene containing point cloud/mesh and cameras
|
| 187 |
|
| 188 |
Raises:
|
| 189 |
ValueError: If input predictions structure is invalid
|
|
|
|
| 352 |
scene_3d = trimesh.Scene()
|
| 353 |
|
| 354 |
# Add point cloud data to the scene
|
| 355 |
+
if as_mesh:
|
| 356 |
+
# Create mesh from pointcloud
|
| 357 |
+
# try:
|
| 358 |
+
if selected_frame_idx is not None:
|
| 359 |
+
# Single frame case - we can create a proper mesh
|
| 360 |
+
H, W = pred_world_points.shape[1:3]
|
| 361 |
+
|
| 362 |
+
# Get original unfiltered data for mesh creation
|
| 363 |
+
original_points = pred_world_points.reshape(H, W, 3)
|
| 364 |
+
|
| 365 |
+
# Reshape original image data properly
|
| 366 |
+
if images.ndim == 4 and images.shape[1] == 3: # NCHW format
|
| 367 |
+
original_image_colors = np.transpose(images[0], (1, 2, 0))
|
| 368 |
+
else: # Assume already in HWC format
|
| 369 |
+
original_image_colors = images[0]
|
| 370 |
+
original_image_colors *= 255
|
| 371 |
+
# Create mask from confidence and other filters
|
| 372 |
+
original_conf = pred_world_points_conf.reshape(H, W)
|
| 373 |
+
original_final_mask = predictions["final_mask"][selected_frame_idx].reshape(
|
| 374 |
+
H, W
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
# Apply thresholds to create mask
|
| 378 |
+
mask = (original_conf >= conf_threshold) & (original_conf > 1e-5)
|
| 379 |
+
if mask_ambiguous:
|
| 380 |
+
mask = mask & original_final_mask
|
| 381 |
+
|
| 382 |
+
# Additional background masks if needed
|
| 383 |
+
if mask_black_bg:
|
| 384 |
+
black_bg_mask = original_image_colors.sum(axis=2) >= 16
|
| 385 |
+
mask = mask & black_bg_mask
|
| 386 |
+
|
| 387 |
+
if mask_white_bg:
|
| 388 |
+
white_bg_mask = ~(
|
| 389 |
+
(original_image_colors[:, :, 0] > 240)
|
| 390 |
+
& (original_image_colors[:, :, 1] > 240)
|
| 391 |
+
& (original_image_colors[:, :, 2] > 240)
|
| 392 |
+
)
|
| 393 |
+
mask = mask & white_bg_mask
|
| 394 |
+
|
| 395 |
+
# Check if normals are available in predictions
|
| 396 |
+
vertex_normals = None
|
| 397 |
+
if "normal" in predictions and predictions["normal"] is not None:
|
| 398 |
+
# Get normals for the selected frame
|
| 399 |
+
frame_normals = (
|
| 400 |
+
predictions["normal"][selected_frame_idx]
|
| 401 |
+
if selected_frame_idx is not None
|
| 402 |
+
else predictions["normal"][0]
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
# Create faces and vertices using image_mesh with normals support
|
| 406 |
+
faces, vertices, vertex_colors, vertex_normals = image_mesh(
|
| 407 |
+
original_points * np.array([1, -1, 1], dtype=np.float32),
|
| 408 |
+
original_image_colors / 255.0,
|
| 409 |
+
frame_normals * np.array([1, -1, 1], dtype=np.float32),
|
| 410 |
+
mask=original_final_mask,
|
| 411 |
+
tri=True,
|
| 412 |
+
return_indices=False,
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
# Apply coordinate transformations to normals
|
| 416 |
+
vertex_normals = vertex_normals * np.array([1, -1, 1], dtype=np.float32)
|
| 417 |
+
# frame_normals = frame_normals * np.array([1, -1, 1], dtype=np.float32)
|
| 418 |
+
else:
|
| 419 |
+
# Create faces and vertices using image_mesh without normals
|
| 420 |
+
faces, vertices, vertex_colors = image_mesh(
|
| 421 |
+
original_points * np.array([1, -1, 1], dtype=np.float32),
|
| 422 |
+
original_image_colors / 255.0,
|
| 423 |
+
mask=original_final_mask,
|
| 424 |
+
tri=True,
|
| 425 |
+
return_indices=False,
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
vertices = vertices * np.array([1, -1, 1], dtype=np.float32)
|
| 429 |
+
|
| 430 |
+
# Create trimesh object with optional normals
|
| 431 |
+
mesh_data = trimesh.Trimesh(
|
| 432 |
+
vertices=vertices,
|
| 433 |
+
faces=faces,
|
| 434 |
+
vertex_colors=(vertex_colors * 255).astype(np.uint8),
|
| 435 |
+
vertex_normals=(vertex_normals if vertex_normals is not None else None),
|
| 436 |
+
process=False,
|
| 437 |
+
)
|
| 438 |
+
scene_3d.add_geometry(mesh_data)
|
| 439 |
|
| 440 |
+
else:
|
| 441 |
+
# Multi-frame case - create separate meshes for each frame
|
| 442 |
+
print("Creating mesh for multi-frame data...")
|
| 443 |
+
|
| 444 |
+
for frame_idx in range(pred_world_points.shape[0]):
|
| 445 |
+
H, W = pred_world_points.shape[1:3]
|
| 446 |
+
|
| 447 |
+
# Get data for this frame
|
| 448 |
+
frame_points = pred_world_points[frame_idx]
|
| 449 |
+
frame_conf = pred_world_points_conf[frame_idx]
|
| 450 |
+
frame_final_mask = predictions["final_mask"][frame_idx]
|
| 451 |
+
|
| 452 |
+
# Get frame image
|
| 453 |
+
if images.ndim == 4 and images.shape[1] == 3: # NCHW format
|
| 454 |
+
frame_image = np.transpose(images[frame_idx], (1, 2, 0))
|
| 455 |
+
else: # Assume already in HWC format
|
| 456 |
+
frame_image = images[frame_idx]
|
| 457 |
+
frame_image *= 255
|
| 458 |
+
# Create mask for this frame
|
| 459 |
+
mask = (frame_conf >= conf_threshold) & (frame_conf > 1e-5)
|
| 460 |
+
if mask_ambiguous:
|
| 461 |
+
mask = mask | frame_final_mask
|
| 462 |
+
|
| 463 |
+
# Create mesh for this frame
|
| 464 |
+
faces, vertices, vertex_colors = image_mesh(
|
| 465 |
+
frame_points * np.array([1, -1, 1], dtype=np.float32),
|
| 466 |
+
frame_image / 255.0,
|
| 467 |
+
mask=frame_final_mask,
|
| 468 |
+
tri=True,
|
| 469 |
+
return_indices=False,
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
vertices = vertices * np.array([1, -1, 1], dtype=np.float32)
|
| 473 |
+
# Create trimesh object for this frame
|
| 474 |
+
frame_mesh = trimesh.Trimesh(
|
| 475 |
+
vertices=vertices,
|
| 476 |
+
faces=faces,
|
| 477 |
+
vertex_colors=(vertex_colors * 255).astype(np.uint8),
|
| 478 |
+
process=False,
|
| 479 |
+
)
|
| 480 |
+
scene_3d.add_geometry(frame_mesh)
|
| 481 |
+
else:
|
| 482 |
+
point_cloud_data = trimesh.PointCloud(vertices=vertices_3d, colors=colors_rgb)
|
| 483 |
+
scene_3d.add_geometry(point_cloud_data)
|
| 484 |
|
| 485 |
# Prepare 4x4 matrices for camera extrinsics
|
| 486 |
num_cameras = len(camera_matrices)
|
|
|
|
| 522 |
face_colors (tuple): Color of the camera face.
|
| 523 |
scene_scale (float): Scale of the scene.
|
| 524 |
"""
|
| 525 |
+
scene_scale = 12
|
| 526 |
cam_width = scene_scale * 0.05
|
| 527 |
cam_height = scene_scale * 0.1
|
| 528 |
+
# cam_width = scene_scale * 0.05
|
| 529 |
+
# cam_height = scene_scale * 0.1
|
| 530 |
|
| 531 |
# Create cone shape for camera
|
| 532 |
rot_45_degree = np.eye(4)
|