Spaces:
Running
on
Zero
Running
on
Zero
aknapitsch user
commited on
Commit
·
9f367f7
1
Parent(s):
9a006f8
finished mesh view
Browse files- app.py +165 -110
- mapanything/utils/hf_utils/visual_util.py +50 -137
app.py
CHANGED
|
@@ -48,26 +48,6 @@ def get_logo_base64():
|
|
| 48 |
return None
|
| 49 |
|
| 50 |
|
| 51 |
-
# MapAnything Configuration
|
| 52 |
-
# high_level_config = {
|
| 53 |
-
# "path": "configs/train.yaml",
|
| 54 |
-
# "hf_model_name": "facebook/MapAnything",
|
| 55 |
-
# "model_str": "mapanything",
|
| 56 |
-
# "config_overrides": [
|
| 57 |
-
# "machine=aws",
|
| 58 |
-
# "model=mapanything",
|
| 59 |
-
# "model/task=images_only",
|
| 60 |
-
# "model.encoder.uses_torch_hub=false",
|
| 61 |
-
# ],
|
| 62 |
-
# "checkpoint_name": "mapa_curri_24v_13d_48ipg_64g.pth",
|
| 63 |
-
# "config_name": "config.json",
|
| 64 |
-
# "trained_with_amp": True,
|
| 65 |
-
# "trained_with_amp_dtype": "fp16",
|
| 66 |
-
# "data_norm_type": "dinov2",
|
| 67 |
-
# "patch_size": 14,
|
| 68 |
-
# "resolution": 518,
|
| 69 |
-
# }
|
| 70 |
-
|
| 71 |
# MapAnything Configuration
|
| 72 |
high_level_config = {
|
| 73 |
"path": "configs/train.yaml",
|
|
@@ -96,7 +76,13 @@ model = None
|
|
| 96 |
# 1) Core model inference
|
| 97 |
# -------------------------------------------------------------------------
|
| 98 |
@spaces.GPU(duration=120)
|
| 99 |
-
def run_model(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
"""
|
| 101 |
Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
|
| 102 |
"""
|
|
@@ -204,7 +190,7 @@ def run_model(target_dir, apply_mask=True, mask_edges=True):
|
|
| 204 |
|
| 205 |
# Process data for visualization tabs (depth, normal, measure)
|
| 206 |
processed_data = process_predictions_for_visualization(
|
| 207 |
-
predictions, views, high_level_config
|
| 208 |
)
|
| 209 |
|
| 210 |
# Clean up
|
|
@@ -246,9 +232,7 @@ def update_depth_view(processed_data, view_index):
|
|
| 246 |
if view_data is None or view_data["depth"] is None:
|
| 247 |
return None
|
| 248 |
|
| 249 |
-
|
| 250 |
-
confidence = view_data.get("confidence")
|
| 251 |
-
return colorize_depth(view_data["depth"], confidence=confidence)
|
| 252 |
|
| 253 |
|
| 254 |
def update_normal_view(processed_data, view_index):
|
|
@@ -257,9 +241,7 @@ def update_normal_view(processed_data, view_index):
|
|
| 257 |
if view_data is None or view_data["normal"] is None:
|
| 258 |
return None
|
| 259 |
|
| 260 |
-
|
| 261 |
-
confidence = view_data.get("confidence")
|
| 262 |
-
return colorize_normal(view_data["normal"], confidence=confidence)
|
| 263 |
|
| 264 |
|
| 265 |
def update_measure_view(processed_data, view_index):
|
|
@@ -475,11 +457,11 @@ def gradio_demo(
|
|
| 475 |
target_dir,
|
| 476 |
frame_filter="All",
|
| 477 |
show_cam=True,
|
| 478 |
-
filter_sky=False,
|
| 479 |
filter_black_bg=False,
|
| 480 |
filter_white_bg=False,
|
| 481 |
apply_mask=True,
|
| 482 |
mask_edges=True,
|
|
|
|
| 483 |
):
|
| 484 |
"""
|
| 485 |
Perform reconstruction using the already-created target_dir/images.
|
|
@@ -491,9 +473,6 @@ def gradio_demo(
|
|
| 491 |
gc.collect()
|
| 492 |
torch.cuda.empty_cache()
|
| 493 |
|
| 494 |
-
# Always use Pointmap Branch for MapAnything
|
| 495 |
-
prediction_mode = "Pointmap Branch"
|
| 496 |
-
|
| 497 |
# Prepare frame_filter dropdown
|
| 498 |
target_dir_images = os.path.join(target_dir, "images")
|
| 499 |
all_files = (
|
|
@@ -519,7 +498,7 @@ def gradio_demo(
|
|
| 519 |
# Build a GLB file name
|
| 520 |
glbfile = os.path.join(
|
| 521 |
target_dir,
|
| 522 |
-
f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}
|
| 523 |
)
|
| 524 |
|
| 525 |
# Convert predictions to GLB
|
|
@@ -527,12 +506,9 @@ def gradio_demo(
|
|
| 527 |
predictions,
|
| 528 |
filter_by_frames=frame_filter,
|
| 529 |
show_cam=show_cam,
|
| 530 |
-
target_dir=target_dir,
|
| 531 |
-
prediction_mode=prediction_mode,
|
| 532 |
-
mask_sky=filter_sky,
|
| 533 |
mask_black_bg=filter_black_bg,
|
| 534 |
mask_white_bg=filter_white_bg,
|
| 535 |
-
as_mesh=
|
| 536 |
)
|
| 537 |
glbscene.export(file_obj=glbfile)
|
| 538 |
|
|
@@ -575,42 +551,19 @@ def gradio_demo(
|
|
| 575 |
# -------------------------------------------------------------------------
|
| 576 |
# 5) Helper functions for UI resets + re-visualization
|
| 577 |
# -------------------------------------------------------------------------
|
| 578 |
-
def
|
| 579 |
-
"""
|
| 580 |
-
if confidence is None or data is None:
|
| 581 |
-
return data
|
| 582 |
-
|
| 583 |
-
# Convert confidence threshold from percentage to confidence value
|
| 584 |
-
conf_threshold = np.percentile(confidence, conf_thres)
|
| 585 |
-
conf_mask = (confidence >= conf_threshold) & (confidence > 1e-5)
|
| 586 |
-
|
| 587 |
-
# conf_mask = confidence >= (conf_thres)
|
| 588 |
-
|
| 589 |
-
# Apply mask to data
|
| 590 |
-
if len(data.shape) == 3: # 3D data (H, W, C)
|
| 591 |
-
filtered_data = data.copy()
|
| 592 |
-
for c in range(data.shape[2]):
|
| 593 |
-
filtered_data[:, :, c] = np.where(conf_mask, data[:, :, c], 0)
|
| 594 |
-
elif len(data.shape) == 2: # 2D data (H, W)
|
| 595 |
-
filtered_data = np.where(conf_mask, data, 0)
|
| 596 |
-
else:
|
| 597 |
-
filtered_data = data
|
| 598 |
-
|
| 599 |
-
return filtered_data
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
def colorize_depth(depth_map, confidence=None, conf_thres=None):
|
| 603 |
-
"""Convert depth map to colorized visualization with optional confidence filtering"""
|
| 604 |
if depth_map is None:
|
| 605 |
return None
|
| 606 |
|
| 607 |
-
# Apply confidence filtering if available
|
| 608 |
-
if confidence is not None and conf_thres is not None:
|
| 609 |
-
depth_map = apply_confidence_filtering(depth_map, confidence, conf_thres)
|
| 610 |
-
|
| 611 |
# Normalize depth to 0-1 range
|
| 612 |
depth_normalized = depth_map.copy()
|
| 613 |
valid_mask = depth_normalized > 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
if valid_mask.sum() > 0:
|
| 615 |
valid_depths = depth_normalized[valid_mask]
|
| 616 |
p5 = np.percentile(valid_depths, 5)
|
|
@@ -622,8 +575,6 @@ def colorize_depth(depth_map, confidence=None, conf_thres=None):
|
|
| 622 |
import matplotlib.pyplot as plt
|
| 623 |
|
| 624 |
colormap = plt.cm.turbo_r
|
| 625 |
-
# colormap = plt.cm.plasma
|
| 626 |
-
# colormap = plt.cm.viridis
|
| 627 |
colored = colormap(depth_normalized)
|
| 628 |
colored = (colored[:, :, :3] * 255).astype(np.uint8)
|
| 629 |
|
|
@@ -633,34 +584,36 @@ def colorize_depth(depth_map, confidence=None, conf_thres=None):
|
|
| 633 |
return colored
|
| 634 |
|
| 635 |
|
| 636 |
-
def colorize_normal(normal_map,
|
| 637 |
-
"""Convert normal map to colorized visualization with optional
|
| 638 |
if normal_map is None:
|
| 639 |
return None
|
| 640 |
|
| 641 |
-
#
|
| 642 |
-
|
| 643 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
|
| 645 |
# Normalize normals to [0, 1] range for visualization
|
| 646 |
-
normal_vis = (
|
| 647 |
normal_vis = (normal_vis * 255).astype(np.uint8)
|
| 648 |
|
| 649 |
return normal_vis
|
| 650 |
|
| 651 |
|
| 652 |
-
def process_predictions_for_visualization(
|
|
|
|
|
|
|
| 653 |
"""Extract depth, normal, and 3D points from predictions for visualization"""
|
| 654 |
processed_data = {}
|
| 655 |
|
| 656 |
-
# Check if confidence data is available in any view
|
| 657 |
-
has_confidence_data = False
|
| 658 |
-
|
| 659 |
# Process each view
|
| 660 |
for view_idx, view in enumerate(views):
|
| 661 |
# Get image
|
| 662 |
image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
| 663 |
-
# image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
| 664 |
|
| 665 |
# Get predicted points
|
| 666 |
pred_pts3d = predictions["world_points"][view_idx]
|
|
@@ -672,12 +625,32 @@ def process_predictions_for_visualization(predictions, views, high_level_config)
|
|
| 672 |
"depth": None,
|
| 673 |
"normal": None,
|
| 674 |
"mask": None,
|
| 675 |
-
"confidence": None,
|
| 676 |
-
"has_confidence": has_confidence_data,
|
| 677 |
}
|
| 678 |
|
| 679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
|
|
|
|
| 681 |
view_data["depth"] = predictions["depth"][view_idx].squeeze()
|
| 682 |
|
| 683 |
normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
|
|
@@ -872,7 +845,6 @@ def update_visualization(
|
|
| 872 |
frame_filter,
|
| 873 |
show_cam,
|
| 874 |
is_example,
|
| 875 |
-
filter_sky=False,
|
| 876 |
filter_black_bg=False,
|
| 877 |
filter_white_bg=False,
|
| 878 |
show_mesh=True,
|
|
@@ -905,12 +877,9 @@ def update_visualization(
|
|
| 905 |
loaded = np.load(predictions_path, allow_pickle=True)
|
| 906 |
predictions = {key: loaded[key] for key in loaded.keys()}
|
| 907 |
|
| 908 |
-
# Always use Pointmap Branch for MapAnything
|
| 909 |
-
prediction_mode = "Pointmap Branch"
|
| 910 |
-
|
| 911 |
glbfile = os.path.join(
|
| 912 |
target_dir,
|
| 913 |
-
f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}
|
| 914 |
)
|
| 915 |
|
| 916 |
if not os.path.exists(glbfile):
|
|
@@ -918,9 +887,6 @@ def update_visualization(
|
|
| 918 |
predictions,
|
| 919 |
filter_by_frames=frame_filter,
|
| 920 |
show_cam=show_cam,
|
| 921 |
-
target_dir=target_dir,
|
| 922 |
-
prediction_mode=prediction_mode,
|
| 923 |
-
mask_sky=filter_sky,
|
| 924 |
mask_black_bg=filter_black_bg,
|
| 925 |
mask_white_bg=filter_white_bg,
|
| 926 |
as_mesh=show_mesh,
|
|
@@ -933,6 +899,77 @@ def update_visualization(
|
|
| 933 |
)
|
| 934 |
|
| 935 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 936 |
# -------------------------------------------------------------------------
|
| 937 |
# Example scene functions
|
| 938 |
# -------------------------------------------------------------------------
|
|
@@ -1147,9 +1184,6 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1147 |
gr.Markdown("### Pointcloud options (live updates)")
|
| 1148 |
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
| 1149 |
show_mesh = gr.Checkbox(label="Show mesh", value=True)
|
| 1150 |
-
filter_sky = gr.Checkbox(
|
| 1151 |
-
label="Filter Sky (using skyseg.onnx)", value=False
|
| 1152 |
-
)
|
| 1153 |
filter_black_bg = gr.Checkbox(
|
| 1154 |
label="Filter Black Background", value=False
|
| 1155 |
)
|
|
@@ -1160,7 +1194,6 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1160 |
apply_mask_checkbox = gr.Checkbox(
|
| 1161 |
label="Apply non-ambiguous mask", value=True
|
| 1162 |
)
|
| 1163 |
-
mask_edges_checkbox = apply_mask_checkbox
|
| 1164 |
# ---------------------- Example Scenes Section ----------------------
|
| 1165 |
gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
|
| 1166 |
gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
|
|
@@ -1223,11 +1256,10 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1223 |
target_dir_output,
|
| 1224 |
frame_filter,
|
| 1225 |
show_cam,
|
| 1226 |
-
filter_sky,
|
| 1227 |
filter_black_bg,
|
| 1228 |
filter_white_bg,
|
| 1229 |
apply_mask_checkbox,
|
| 1230 |
-
|
| 1231 |
],
|
| 1232 |
outputs=[
|
| 1233 |
reconstruction_output,
|
|
@@ -1258,6 +1290,9 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1258 |
frame_filter,
|
| 1259 |
show_cam,
|
| 1260 |
is_example,
|
|
|
|
|
|
|
|
|
|
| 1261 |
],
|
| 1262 |
[reconstruction_output, log_output],
|
| 1263 |
)
|
|
@@ -1271,31 +1306,35 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1271 |
],
|
| 1272 |
[reconstruction_output, log_output],
|
| 1273 |
)
|
| 1274 |
-
|
| 1275 |
update_visualization,
|
| 1276 |
[
|
| 1277 |
target_dir_output,
|
| 1278 |
frame_filter,
|
| 1279 |
show_cam,
|
| 1280 |
is_example,
|
| 1281 |
-
filter_sky,
|
| 1282 |
filter_black_bg,
|
| 1283 |
filter_white_bg,
|
| 1284 |
],
|
| 1285 |
[reconstruction_output, log_output],
|
| 1286 |
-
)
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
[
|
| 1290 |
target_dir_output,
|
| 1291 |
-
frame_filter,
|
| 1292 |
-
show_cam,
|
| 1293 |
-
is_example,
|
| 1294 |
-
filter_sky,
|
| 1295 |
filter_black_bg,
|
| 1296 |
filter_white_bg,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1297 |
],
|
| 1298 |
-
[reconstruction_output, log_output],
|
| 1299 |
)
|
| 1300 |
filter_white_bg.change(
|
| 1301 |
update_visualization,
|
|
@@ -1304,12 +1343,29 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1304 |
frame_filter,
|
| 1305 |
show_cam,
|
| 1306 |
is_example,
|
| 1307 |
-
filter_sky,
|
| 1308 |
filter_black_bg,
|
| 1309 |
filter_white_bg,
|
| 1310 |
show_mesh,
|
| 1311 |
],
|
| 1312 |
[reconstruction_output, log_output],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1313 |
)
|
| 1314 |
|
| 1315 |
show_mesh.change(
|
|
@@ -1319,7 +1375,6 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1319 |
frame_filter,
|
| 1320 |
show_cam,
|
| 1321 |
is_example,
|
| 1322 |
-
filter_sky,
|
| 1323 |
filter_black_bg,
|
| 1324 |
filter_white_bg,
|
| 1325 |
show_mesh,
|
|
|
|
| 48 |
return None
|
| 49 |
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# MapAnything Configuration
|
| 52 |
high_level_config = {
|
| 53 |
"path": "configs/train.yaml",
|
|
|
|
| 76 |
# 1) Core model inference
|
| 77 |
# -------------------------------------------------------------------------
|
| 78 |
@spaces.GPU(duration=120)
|
| 79 |
+
def run_model(
|
| 80 |
+
target_dir,
|
| 81 |
+
apply_mask=True,
|
| 82 |
+
mask_edges=True,
|
| 83 |
+
filter_black_bg=False,
|
| 84 |
+
filter_white_bg=False,
|
| 85 |
+
):
|
| 86 |
"""
|
| 87 |
Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
|
| 88 |
"""
|
|
|
|
| 190 |
|
| 191 |
# Process data for visualization tabs (depth, normal, measure)
|
| 192 |
processed_data = process_predictions_for_visualization(
|
| 193 |
+
predictions, views, high_level_config, filter_black_bg, filter_white_bg
|
| 194 |
)
|
| 195 |
|
| 196 |
# Clean up
|
|
|
|
| 232 |
if view_data is None or view_data["depth"] is None:
|
| 233 |
return None
|
| 234 |
|
| 235 |
+
return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
|
|
|
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
def update_normal_view(processed_data, view_index):
|
|
|
|
| 241 |
if view_data is None or view_data["normal"] is None:
|
| 242 |
return None
|
| 243 |
|
| 244 |
+
return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
|
|
|
|
|
|
|
| 245 |
|
| 246 |
|
| 247 |
def update_measure_view(processed_data, view_index):
|
|
|
|
| 457 |
target_dir,
|
| 458 |
frame_filter="All",
|
| 459 |
show_cam=True,
|
|
|
|
| 460 |
filter_black_bg=False,
|
| 461 |
filter_white_bg=False,
|
| 462 |
apply_mask=True,
|
| 463 |
mask_edges=True,
|
| 464 |
+
show_mesh=True,
|
| 465 |
):
|
| 466 |
"""
|
| 467 |
Perform reconstruction using the already-created target_dir/images.
|
|
|
|
| 473 |
gc.collect()
|
| 474 |
torch.cuda.empty_cache()
|
| 475 |
|
|
|
|
|
|
|
|
|
|
| 476 |
# Prepare frame_filter dropdown
|
| 477 |
target_dir_images = os.path.join(target_dir, "images")
|
| 478 |
all_files = (
|
|
|
|
| 498 |
# Build a GLB file name
|
| 499 |
glbfile = os.path.join(
|
| 500 |
target_dir,
|
| 501 |
+
f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
|
| 502 |
)
|
| 503 |
|
| 504 |
# Convert predictions to GLB
|
|
|
|
| 506 |
predictions,
|
| 507 |
filter_by_frames=frame_filter,
|
| 508 |
show_cam=show_cam,
|
|
|
|
|
|
|
|
|
|
| 509 |
mask_black_bg=filter_black_bg,
|
| 510 |
mask_white_bg=filter_white_bg,
|
| 511 |
+
as_mesh=show_mesh, # Use the show_mesh parameter
|
| 512 |
)
|
| 513 |
glbscene.export(file_obj=glbfile)
|
| 514 |
|
|
|
|
| 551 |
# -------------------------------------------------------------------------
|
| 552 |
# 5) Helper functions for UI resets + re-visualization
|
| 553 |
# -------------------------------------------------------------------------
|
| 554 |
+
def colorize_depth(depth_map, mask=None):
|
| 555 |
+
"""Convert depth map to colorized visualization with optional mask"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
if depth_map is None:
|
| 557 |
return None
|
| 558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
# Normalize depth to 0-1 range
|
| 560 |
depth_normalized = depth_map.copy()
|
| 561 |
valid_mask = depth_normalized > 0
|
| 562 |
+
|
| 563 |
+
# Apply additional mask if provided (for background filtering)
|
| 564 |
+
if mask is not None:
|
| 565 |
+
valid_mask = valid_mask & mask
|
| 566 |
+
|
| 567 |
if valid_mask.sum() > 0:
|
| 568 |
valid_depths = depth_normalized[valid_mask]
|
| 569 |
p5 = np.percentile(valid_depths, 5)
|
|
|
|
| 575 |
import matplotlib.pyplot as plt
|
| 576 |
|
| 577 |
colormap = plt.cm.turbo_r
|
|
|
|
|
|
|
| 578 |
colored = colormap(depth_normalized)
|
| 579 |
colored = (colored[:, :, :3] * 255).astype(np.uint8)
|
| 580 |
|
|
|
|
| 584 |
return colored
|
| 585 |
|
| 586 |
|
| 587 |
+
def colorize_normal(normal_map, mask=None):
|
| 588 |
+
"""Convert normal map to colorized visualization with optional mask"""
|
| 589 |
if normal_map is None:
|
| 590 |
return None
|
| 591 |
|
| 592 |
+
# Create a copy for modification
|
| 593 |
+
normal_vis = normal_map.copy()
|
| 594 |
+
|
| 595 |
+
# Apply mask if provided (set masked areas to [0, 0, 0] which becomes grey after normalization)
|
| 596 |
+
if mask is not None:
|
| 597 |
+
invalid_mask = ~mask
|
| 598 |
+
normal_vis[invalid_mask] = [0, 0, 0] # Set invalid areas to zero
|
| 599 |
|
| 600 |
# Normalize normals to [0, 1] range for visualization
|
| 601 |
+
normal_vis = (normal_vis + 1.0) / 2.0
|
| 602 |
normal_vis = (normal_vis * 255).astype(np.uint8)
|
| 603 |
|
| 604 |
return normal_vis
|
| 605 |
|
| 606 |
|
| 607 |
+
def process_predictions_for_visualization(
|
| 608 |
+
predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
|
| 609 |
+
):
|
| 610 |
"""Extract depth, normal, and 3D points from predictions for visualization"""
|
| 611 |
processed_data = {}
|
| 612 |
|
|
|
|
|
|
|
|
|
|
| 613 |
# Process each view
|
| 614 |
for view_idx, view in enumerate(views):
|
| 615 |
# Get image
|
| 616 |
image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
|
|
|
| 617 |
|
| 618 |
# Get predicted points
|
| 619 |
pred_pts3d = predictions["world_points"][view_idx]
|
|
|
|
| 625 |
"depth": None,
|
| 626 |
"normal": None,
|
| 627 |
"mask": None,
|
|
|
|
|
|
|
| 628 |
}
|
| 629 |
|
| 630 |
+
# Start with the final mask from predictions
|
| 631 |
+
mask = predictions["final_mask"][view_idx].copy()
|
| 632 |
+
|
| 633 |
+
# Apply black background filtering if enabled
|
| 634 |
+
if filter_black_bg:
|
| 635 |
+
# Get the image colors (ensure they're in 0-255 range)
|
| 636 |
+
view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
|
| 637 |
+
# Filter out black background pixels (sum of RGB < 16)
|
| 638 |
+
black_bg_mask = view_colors.sum(axis=2) >= 16
|
| 639 |
+
mask = mask & black_bg_mask
|
| 640 |
+
|
| 641 |
+
# Apply white background filtering if enabled
|
| 642 |
+
if filter_white_bg:
|
| 643 |
+
# Get the image colors (ensure they're in 0-255 range)
|
| 644 |
+
view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
|
| 645 |
+
# Filter out white background pixels (all RGB > 240)
|
| 646 |
+
white_bg_mask = ~(
|
| 647 |
+
(view_colors[:, :, 0] > 240)
|
| 648 |
+
& (view_colors[:, :, 1] > 240)
|
| 649 |
+
& (view_colors[:, :, 2] > 240)
|
| 650 |
+
)
|
| 651 |
+
mask = mask & white_bg_mask
|
| 652 |
|
| 653 |
+
view_data["mask"] = mask
|
| 654 |
view_data["depth"] = predictions["depth"][view_idx].squeeze()
|
| 655 |
|
| 656 |
normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
|
|
|
|
| 845 |
frame_filter,
|
| 846 |
show_cam,
|
| 847 |
is_example,
|
|
|
|
| 848 |
filter_black_bg=False,
|
| 849 |
filter_white_bg=False,
|
| 850 |
show_mesh=True,
|
|
|
|
| 877 |
loaded = np.load(predictions_path, allow_pickle=True)
|
| 878 |
predictions = {key: loaded[key] for key in loaded.keys()}
|
| 879 |
|
|
|
|
|
|
|
|
|
|
| 880 |
glbfile = os.path.join(
|
| 881 |
target_dir,
|
| 882 |
+
f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
|
| 883 |
)
|
| 884 |
|
| 885 |
if not os.path.exists(glbfile):
|
|
|
|
| 887 |
predictions,
|
| 888 |
filter_by_frames=frame_filter,
|
| 889 |
show_cam=show_cam,
|
|
|
|
|
|
|
|
|
|
| 890 |
mask_black_bg=filter_black_bg,
|
| 891 |
mask_white_bg=filter_white_bg,
|
| 892 |
as_mesh=show_mesh,
|
|
|
|
| 899 |
)
|
| 900 |
|
| 901 |
|
| 902 |
+
def update_all_views_on_filter_change(
|
| 903 |
+
target_dir,
|
| 904 |
+
filter_black_bg,
|
| 905 |
+
filter_white_bg,
|
| 906 |
+
processed_data,
|
| 907 |
+
depth_view_selector,
|
| 908 |
+
normal_view_selector,
|
| 909 |
+
measure_view_selector,
|
| 910 |
+
):
|
| 911 |
+
"""
|
| 912 |
+
Update all individual view tabs when background filtering checkboxes change.
|
| 913 |
+
This regenerates the processed data with new filtering and updates all views.
|
| 914 |
+
"""
|
| 915 |
+
# Check if we have a valid target directory and predictions
|
| 916 |
+
if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
|
| 917 |
+
return processed_data, None, None, None, []
|
| 918 |
+
|
| 919 |
+
predictions_path = os.path.join(target_dir, "predictions.npz")
|
| 920 |
+
if not os.path.exists(predictions_path):
|
| 921 |
+
return processed_data, None, None, None, []
|
| 922 |
+
|
| 923 |
+
try:
|
| 924 |
+
# Load the original predictions and views
|
| 925 |
+
loaded = np.load(predictions_path, allow_pickle=True)
|
| 926 |
+
predictions = {key: loaded[key] for key in loaded.keys()}
|
| 927 |
+
|
| 928 |
+
# Load images using MapAnything's load_images function
|
| 929 |
+
image_folder_path = os.path.join(target_dir, "images")
|
| 930 |
+
views = load_images(image_folder_path)
|
| 931 |
+
|
| 932 |
+
# Regenerate processed data with new filtering settings
|
| 933 |
+
new_processed_data = process_predictions_for_visualization(
|
| 934 |
+
predictions, views, high_level_config, filter_black_bg, filter_white_bg
|
| 935 |
+
)
|
| 936 |
+
|
| 937 |
+
# Get current view indices
|
| 938 |
+
try:
|
| 939 |
+
depth_view_idx = (
|
| 940 |
+
int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
|
| 941 |
+
)
|
| 942 |
+
except:
|
| 943 |
+
depth_view_idx = 0
|
| 944 |
+
|
| 945 |
+
try:
|
| 946 |
+
normal_view_idx = (
|
| 947 |
+
int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
|
| 948 |
+
)
|
| 949 |
+
except:
|
| 950 |
+
normal_view_idx = 0
|
| 951 |
+
|
| 952 |
+
try:
|
| 953 |
+
measure_view_idx = (
|
| 954 |
+
int(measure_view_selector.split()[1]) - 1
|
| 955 |
+
if measure_view_selector
|
| 956 |
+
else 0
|
| 957 |
+
)
|
| 958 |
+
except:
|
| 959 |
+
measure_view_idx = 0
|
| 960 |
+
|
| 961 |
+
# Update all views with new filtered data
|
| 962 |
+
depth_vis = update_depth_view(new_processed_data, depth_view_idx)
|
| 963 |
+
normal_vis = update_normal_view(new_processed_data, normal_view_idx)
|
| 964 |
+
measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)
|
| 965 |
+
|
| 966 |
+
return new_processed_data, depth_vis, normal_vis, measure_img, []
|
| 967 |
+
|
| 968 |
+
except Exception as e:
|
| 969 |
+
print(f"Error updating views on filter change: {e}")
|
| 970 |
+
return processed_data, None, None, None, []
|
| 971 |
+
|
| 972 |
+
|
| 973 |
# -------------------------------------------------------------------------
|
| 974 |
# Example scene functions
|
| 975 |
# -------------------------------------------------------------------------
|
|
|
|
| 1184 |
gr.Markdown("### Pointcloud options (live updates)")
|
| 1185 |
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
| 1186 |
show_mesh = gr.Checkbox(label="Show mesh", value=True)
|
|
|
|
|
|
|
|
|
|
| 1187 |
filter_black_bg = gr.Checkbox(
|
| 1188 |
label="Filter Black Background", value=False
|
| 1189 |
)
|
|
|
|
| 1194 |
apply_mask_checkbox = gr.Checkbox(
|
| 1195 |
label="Apply non-ambiguous mask", value=True
|
| 1196 |
)
|
|
|
|
| 1197 |
# ---------------------- Example Scenes Section ----------------------
|
| 1198 |
gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
|
| 1199 |
gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
|
|
|
|
| 1256 |
target_dir_output,
|
| 1257 |
frame_filter,
|
| 1258 |
show_cam,
|
|
|
|
| 1259 |
filter_black_bg,
|
| 1260 |
filter_white_bg,
|
| 1261 |
apply_mask_checkbox,
|
| 1262 |
+
show_mesh,
|
| 1263 |
],
|
| 1264 |
outputs=[
|
| 1265 |
reconstruction_output,
|
|
|
|
| 1290 |
frame_filter,
|
| 1291 |
show_cam,
|
| 1292 |
is_example,
|
| 1293 |
+
filter_black_bg,
|
| 1294 |
+
filter_white_bg,
|
| 1295 |
+
show_mesh,
|
| 1296 |
],
|
| 1297 |
[reconstruction_output, log_output],
|
| 1298 |
)
|
|
|
|
| 1306 |
],
|
| 1307 |
[reconstruction_output, log_output],
|
| 1308 |
)
|
| 1309 |
+
filter_black_bg.change(
|
| 1310 |
update_visualization,
|
| 1311 |
[
|
| 1312 |
target_dir_output,
|
| 1313 |
frame_filter,
|
| 1314 |
show_cam,
|
| 1315 |
is_example,
|
|
|
|
| 1316 |
filter_black_bg,
|
| 1317 |
filter_white_bg,
|
| 1318 |
],
|
| 1319 |
[reconstruction_output, log_output],
|
| 1320 |
+
).then(
|
| 1321 |
+
fn=update_all_views_on_filter_change,
|
| 1322 |
+
inputs=[
|
|
|
|
| 1323 |
target_dir_output,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1324 |
filter_black_bg,
|
| 1325 |
filter_white_bg,
|
| 1326 |
+
processed_data_state,
|
| 1327 |
+
depth_view_selector,
|
| 1328 |
+
normal_view_selector,
|
| 1329 |
+
measure_view_selector,
|
| 1330 |
+
],
|
| 1331 |
+
outputs=[
|
| 1332 |
+
processed_data_state,
|
| 1333 |
+
depth_map,
|
| 1334 |
+
normal_map,
|
| 1335 |
+
measure_image,
|
| 1336 |
+
measure_points_state,
|
| 1337 |
],
|
|
|
|
| 1338 |
)
|
| 1339 |
filter_white_bg.change(
|
| 1340 |
update_visualization,
|
|
|
|
| 1343 |
frame_filter,
|
| 1344 |
show_cam,
|
| 1345 |
is_example,
|
|
|
|
| 1346 |
filter_black_bg,
|
| 1347 |
filter_white_bg,
|
| 1348 |
show_mesh,
|
| 1349 |
],
|
| 1350 |
[reconstruction_output, log_output],
|
| 1351 |
+
).then(
|
| 1352 |
+
fn=update_all_views_on_filter_change,
|
| 1353 |
+
inputs=[
|
| 1354 |
+
target_dir_output,
|
| 1355 |
+
filter_black_bg,
|
| 1356 |
+
filter_white_bg,
|
| 1357 |
+
processed_data_state,
|
| 1358 |
+
depth_view_selector,
|
| 1359 |
+
normal_view_selector,
|
| 1360 |
+
measure_view_selector,
|
| 1361 |
+
],
|
| 1362 |
+
outputs=[
|
| 1363 |
+
processed_data_state,
|
| 1364 |
+
depth_map,
|
| 1365 |
+
normal_map,
|
| 1366 |
+
measure_image,
|
| 1367 |
+
measure_points_state,
|
| 1368 |
+
],
|
| 1369 |
)
|
| 1370 |
|
| 1371 |
show_mesh.change(
|
|
|
|
| 1375 |
frame_filter,
|
| 1376 |
show_cam,
|
| 1377 |
is_example,
|
|
|
|
| 1378 |
filter_black_bg,
|
| 1379 |
filter_white_bg,
|
| 1380 |
show_mesh,
|
mapanything/utils/hf_utils/visual_util.py
CHANGED
|
@@ -107,13 +107,13 @@ def image_mesh(
|
|
| 107 |
*vertex_attrs (np.ndarray): vertex attributes in corresponding order with input image_attrs
|
| 108 |
indices (np.ndarray, optional): indices of vertices in the original mesh
|
| 109 |
"""
|
| 110 |
-
assert (len(image_attrs) > 0) or (
|
| 111 |
-
|
| 112 |
-
)
|
| 113 |
height, width = next(image_attrs).shape[:2] if mask is None else mask.shape
|
| 114 |
-
assert all(
|
| 115 |
-
|
| 116 |
-
)
|
| 117 |
|
| 118 |
row_faces = np.stack(
|
| 119 |
[
|
|
@@ -151,14 +151,10 @@ def image_mesh(
|
|
| 151 |
|
| 152 |
def predictions_to_glb(
|
| 153 |
predictions,
|
| 154 |
-
conf_thres=50.0,
|
| 155 |
filter_by_frames="all",
|
| 156 |
mask_black_bg=False,
|
| 157 |
mask_white_bg=False,
|
| 158 |
show_cam=True,
|
| 159 |
-
mask_sky=False,
|
| 160 |
-
target_dir=None,
|
| 161 |
-
prediction_mode="Predicted Pointmap",
|
| 162 |
mask_ambiguous=False,
|
| 163 |
as_mesh=True,
|
| 164 |
) -> trimesh.Scene:
|
|
@@ -168,17 +164,12 @@ def predictions_to_glb(
|
|
| 168 |
Args:
|
| 169 |
predictions (dict): Dictionary containing model predictions with keys:
|
| 170 |
- world_points: 3D point coordinates (S, H, W, 3)
|
| 171 |
-
- world_points_conf: Confidence scores (S, H, W)
|
| 172 |
- images: Input images (S, H, W, 3)
|
| 173 |
- extrinsic: Camera extrinsic matrices (S, 3, 4)
|
| 174 |
-
conf_thres (float): Percentage of low-confidence points to filter out (default: 50.0)
|
| 175 |
filter_by_frames (str): Frame filter specification (default: "all")
|
| 176 |
mask_black_bg (bool): Mask out black background pixels (default: False)
|
| 177 |
mask_white_bg (bool): Mask out white background pixels (default: False)
|
| 178 |
show_cam (bool): Include camera visualization (default: True)
|
| 179 |
-
mask_sky (bool): Apply sky segmentation mask (default: False)
|
| 180 |
-
target_dir (str): Output directory for intermediate files (default: None)
|
| 181 |
-
prediction_mode (str): Prediction mode selector (default: "Predicted Pointmap")
|
| 182 |
mask_ambiguous (bool): Apply final mask to filter ambiguous predictions (default: False)
|
| 183 |
as_mesh (bool): Represent the data as a mesh instead of point cloud (default: False)
|
| 184 |
|
|
@@ -191,9 +182,6 @@ def predictions_to_glb(
|
|
| 191 |
if not isinstance(predictions, dict):
|
| 192 |
raise ValueError("predictions must be a dictionary")
|
| 193 |
|
| 194 |
-
if conf_thres is None:
|
| 195 |
-
conf_thres = 10.0
|
| 196 |
-
|
| 197 |
print("Building GLB scene")
|
| 198 |
selected_frame_idx = None
|
| 199 |
if filter_by_frames != "all" and filter_by_frames != "All":
|
|
@@ -203,95 +191,23 @@ def predictions_to_glb(
|
|
| 203 |
except (ValueError, IndexError):
|
| 204 |
pass
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
pred_world_points = predictions[
|
| 214 |
-
"world_points"
|
| 215 |
-
] # No batch dimension to remove
|
| 216 |
-
pred_world_points_conf = predictions.get(
|
| 217 |
-
"confidence", np.ones_like(pred_world_points[..., 0])
|
| 218 |
-
)
|
| 219 |
-
else:
|
| 220 |
-
print(
|
| 221 |
-
"Warning: world_points not found in predictions, falling back to depth-based points"
|
| 222 |
-
)
|
| 223 |
-
pred_world_points = predictions["world_points_from_depth"]
|
| 224 |
-
pred_world_points_conf = predictions.get(
|
| 225 |
-
"depth_conf", np.ones_like(pred_world_points[..., 0])
|
| 226 |
-
)
|
| 227 |
-
else:
|
| 228 |
-
print("Using Depthmap and Camera Branch")
|
| 229 |
-
pred_world_points = predictions["world_points_from_depth"]
|
| 230 |
-
pred_world_points_conf = predictions.get(
|
| 231 |
-
"depth_conf", np.ones_like(pred_world_points[..., 0])
|
| 232 |
)
|
| 233 |
|
|
|
|
|
|
|
| 234 |
# Get images from predictions
|
| 235 |
images = predictions["images"]
|
| 236 |
# Use extrinsic matrices instead of pred_extrinsic_list
|
| 237 |
camera_matrices = predictions["extrinsic"]
|
| 238 |
|
| 239 |
-
if mask_sky:
|
| 240 |
-
if target_dir is not None:
|
| 241 |
-
import onnxruntime
|
| 242 |
-
|
| 243 |
-
skyseg_session = None
|
| 244 |
-
target_dir_images = target_dir + "/images"
|
| 245 |
-
image_list = sorted(os.listdir(target_dir_images))
|
| 246 |
-
sky_mask_list = []
|
| 247 |
-
|
| 248 |
-
# Get the shape of pred_world_points_conf to match
|
| 249 |
-
S, H, W = (
|
| 250 |
-
pred_world_points_conf.shape
|
| 251 |
-
if hasattr(pred_world_points_conf, "shape")
|
| 252 |
-
else (len(images), images.shape[1], images.shape[2])
|
| 253 |
-
)
|
| 254 |
-
|
| 255 |
-
# Download skyseg.onnx if it doesn't exist
|
| 256 |
-
if not os.path.exists("skyseg.onnx"):
|
| 257 |
-
print("Downloading skyseg.onnx...")
|
| 258 |
-
download_file_from_url(
|
| 259 |
-
"https://huggingface.co/JianyuanWang/skyseg/resolve/main/skyseg.onnx",
|
| 260 |
-
"skyseg.onnx",
|
| 261 |
-
)
|
| 262 |
-
|
| 263 |
-
for i, image_name in enumerate(image_list):
|
| 264 |
-
image_filepath = os.path.join(target_dir_images, image_name)
|
| 265 |
-
mask_filepath = os.path.join(target_dir, "sky_masks", image_name)
|
| 266 |
-
|
| 267 |
-
# Check if mask already exists
|
| 268 |
-
if os.path.exists(mask_filepath):
|
| 269 |
-
# Load existing mask
|
| 270 |
-
sky_mask = cv2.imread(mask_filepath, cv2.IMREAD_GRAYSCALE)
|
| 271 |
-
else:
|
| 272 |
-
# Generate new mask
|
| 273 |
-
if skyseg_session is None:
|
| 274 |
-
skyseg_session = onnxruntime.InferenceSession("skyseg.onnx")
|
| 275 |
-
sky_mask = segment_sky(
|
| 276 |
-
image_filepath, skyseg_session, mask_filepath
|
| 277 |
-
)
|
| 278 |
-
|
| 279 |
-
# Resize mask to match H×W if needed
|
| 280 |
-
if sky_mask.shape[0] != H or sky_mask.shape[1] != W:
|
| 281 |
-
sky_mask = cv2.resize(sky_mask, (W, H))
|
| 282 |
-
|
| 283 |
-
sky_mask_list.append(sky_mask)
|
| 284 |
-
|
| 285 |
-
# Convert list to numpy array with shape S×H×W
|
| 286 |
-
sky_mask_array = np.array(sky_mask_list)
|
| 287 |
-
|
| 288 |
-
# Apply sky mask to confidence scores
|
| 289 |
-
sky_mask_binary = (sky_mask_array > 0.1).astype(np.float32)
|
| 290 |
-
pred_world_points_conf = pred_world_points_conf * sky_mask_binary
|
| 291 |
-
|
| 292 |
if selected_frame_idx is not None:
|
| 293 |
pred_world_points = pred_world_points[selected_frame_idx][None]
|
| 294 |
-
pred_world_points_conf = pred_world_points_conf[selected_frame_idx][None]
|
| 295 |
images = images[selected_frame_idx][None]
|
| 296 |
camera_matrices = camera_matrices[selected_frame_idx][None]
|
| 297 |
|
|
@@ -303,36 +219,30 @@ def predictions_to_glb(
|
|
| 303 |
colors_rgb = images
|
| 304 |
colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)
|
| 305 |
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
if conf_thres == 0.0:
|
| 309 |
-
conf_threshold = 0.0
|
| 310 |
-
else:
|
| 311 |
-
conf_threshold = np.percentile(conf, conf_thres)
|
| 312 |
-
|
| 313 |
-
conf_mask = (conf >= conf_threshold) & (conf > 1e-5)
|
| 314 |
final_mask = predictions["final_mask"].reshape(-1)
|
| 315 |
|
| 316 |
if mask_black_bg:
|
| 317 |
-
black_bg_mask = colors_rgb.sum(axis=1) >= 16
|
| 318 |
-
|
| 319 |
|
| 320 |
if mask_white_bg:
|
| 321 |
# Filter out white background pixels (RGB values close to white)
|
| 322 |
# Consider pixels white if all RGB values are above 240
|
| 323 |
white_bg_mask = (
|
| 324 |
-
(colors_rgb[:, 0] > 240
|
| 325 |
-
& (colors_rgb[:, 1] > 240
|
| 326 |
-
& (colors_rgb[:, 2] > 240
|
| 327 |
)
|
| 328 |
-
|
| 329 |
|
| 330 |
-
# Use final_mask
|
| 331 |
if mask_ambiguous:
|
| 332 |
-
|
| 333 |
|
| 334 |
-
vertices_3d = vertices_3d[
|
| 335 |
-
colors_rgb = colors_rgb[
|
| 336 |
|
| 337 |
if vertices_3d is None or np.asarray(vertices_3d).size == 0:
|
| 338 |
vertices_3d = np.array([[1, 0, 0]])
|
|
@@ -368,16 +278,13 @@ def predictions_to_glb(
|
|
| 368 |
else: # Assume already in HWC format
|
| 369 |
original_image_colors = images[0]
|
| 370 |
original_image_colors *= 255
|
| 371 |
-
#
|
| 372 |
-
original_conf = pred_world_points_conf.reshape(H, W)
|
| 373 |
original_final_mask = predictions["final_mask"][selected_frame_idx].reshape(
|
| 374 |
H, W
|
| 375 |
)
|
| 376 |
|
| 377 |
-
#
|
| 378 |
-
mask =
|
| 379 |
-
if mask_ambiguous:
|
| 380 |
-
mask = mask & original_final_mask
|
| 381 |
|
| 382 |
# Additional background masks if needed
|
| 383 |
if mask_black_bg:
|
|
@@ -407,29 +314,28 @@ def predictions_to_glb(
|
|
| 407 |
original_points * np.array([1, -1, 1], dtype=np.float32),
|
| 408 |
original_image_colors / 255.0,
|
| 409 |
frame_normals * np.array([1, -1, 1], dtype=np.float32),
|
| 410 |
-
mask=
|
| 411 |
tri=True,
|
| 412 |
return_indices=False,
|
| 413 |
)
|
| 414 |
|
| 415 |
# Apply coordinate transformations to normals
|
| 416 |
vertex_normals = vertex_normals * np.array([1, -1, 1], dtype=np.float32)
|
| 417 |
-
# frame_normals = frame_normals * np.array([1, -1, 1], dtype=np.float32)
|
| 418 |
else:
|
| 419 |
# Create faces and vertices using image_mesh without normals
|
| 420 |
faces, vertices, vertex_colors = image_mesh(
|
| 421 |
original_points * np.array([1, -1, 1], dtype=np.float32),
|
| 422 |
original_image_colors / 255.0,
|
| 423 |
-
mask=
|
| 424 |
tri=True,
|
| 425 |
return_indices=False,
|
| 426 |
)
|
| 427 |
|
| 428 |
-
vertices = vertices * np.array([1, -1, 1], dtype=np.float32)
|
| 429 |
|
| 430 |
# Create trimesh object with optional normals
|
| 431 |
mesh_data = trimesh.Trimesh(
|
| 432 |
-
vertices=vertices,
|
| 433 |
faces=faces,
|
| 434 |
vertex_colors=(vertex_colors * 255).astype(np.uint8),
|
| 435 |
vertex_normals=(vertex_normals if vertex_normals is not None else None),
|
|
@@ -446,7 +352,6 @@ def predictions_to_glb(
|
|
| 446 |
|
| 447 |
# Get data for this frame
|
| 448 |
frame_points = pred_world_points[frame_idx]
|
| 449 |
-
frame_conf = pred_world_points_conf[frame_idx]
|
| 450 |
frame_final_mask = predictions["final_mask"][frame_idx]
|
| 451 |
|
| 452 |
# Get frame image
|
|
@@ -455,16 +360,27 @@ def predictions_to_glb(
|
|
| 455 |
else: # Assume already in HWC format
|
| 456 |
frame_image = images[frame_idx]
|
| 457 |
frame_image *= 255
|
| 458 |
-
# Create mask for this frame
|
| 459 |
-
mask =
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
# Create mesh for this frame
|
| 464 |
faces, vertices, vertex_colors = image_mesh(
|
| 465 |
frame_points * np.array([1, -1, 1], dtype=np.float32),
|
| 466 |
frame_image / 255.0,
|
| 467 |
-
mask=
|
| 468 |
tri=True,
|
| 469 |
return_indices=False,
|
| 470 |
)
|
|
@@ -484,9 +400,6 @@ def predictions_to_glb(
|
|
| 484 |
|
| 485 |
# Prepare 4x4 matrices for camera extrinsics
|
| 486 |
num_cameras = len(camera_matrices)
|
| 487 |
-
# extrinsics_matrices = np.zeros((num_cameras, 4, 4))
|
| 488 |
-
# extrinsics_matrices[:, :3, :4] = camera_matrices
|
| 489 |
-
# extrinsics_matrices[:, 3, 3] = 1
|
| 490 |
|
| 491 |
if show_cam:
|
| 492 |
# Add camera models to the scene
|
|
@@ -497,7 +410,7 @@ def predictions_to_glb(
|
|
| 497 |
current_color = tuple(int(255 * x) for x in rgba_color[:3])
|
| 498 |
|
| 499 |
integrate_camera_into_scene(
|
| 500 |
-
scene_3d,
|
| 501 |
)
|
| 502 |
|
| 503 |
# Align scene to the observation of the first camera
|
|
|
|
| 107 |
*vertex_attrs (np.ndarray): vertex attributes in corresponding order with input image_attrs
|
| 108 |
indices (np.ndarray, optional): indices of vertices in the original mesh
|
| 109 |
"""
|
| 110 |
+
assert (len(image_attrs) > 0) or (
|
| 111 |
+
mask is not None
|
| 112 |
+
), "At least one of image_attrs or mask should be provided"
|
| 113 |
height, width = next(image_attrs).shape[:2] if mask is None else mask.shape
|
| 114 |
+
assert all(
|
| 115 |
+
img.shape[:2] == (height, width) for img in image_attrs
|
| 116 |
+
), "All image_attrs should have the same shape"
|
| 117 |
|
| 118 |
row_faces = np.stack(
|
| 119 |
[
|
|
|
|
| 151 |
|
| 152 |
def predictions_to_glb(
|
| 153 |
predictions,
|
|
|
|
| 154 |
filter_by_frames="all",
|
| 155 |
mask_black_bg=False,
|
| 156 |
mask_white_bg=False,
|
| 157 |
show_cam=True,
|
|
|
|
|
|
|
|
|
|
| 158 |
mask_ambiguous=False,
|
| 159 |
as_mesh=True,
|
| 160 |
) -> trimesh.Scene:
|
|
|
|
| 164 |
Args:
|
| 165 |
predictions (dict): Dictionary containing model predictions with keys:
|
| 166 |
- world_points: 3D point coordinates (S, H, W, 3)
|
|
|
|
| 167 |
- images: Input images (S, H, W, 3)
|
| 168 |
- extrinsic: Camera extrinsic matrices (S, 3, 4)
|
|
|
|
| 169 |
filter_by_frames (str): Frame filter specification (default: "all")
|
| 170 |
mask_black_bg (bool): Mask out black background pixels (default: False)
|
| 171 |
mask_white_bg (bool): Mask out white background pixels (default: False)
|
| 172 |
show_cam (bool): Include camera visualization (default: True)
|
|
|
|
|
|
|
|
|
|
| 173 |
mask_ambiguous (bool): Apply final mask to filter ambiguous predictions (default: False)
|
| 174 |
as_mesh (bool): Represent the data as a mesh instead of point cloud (default: False)
|
| 175 |
|
|
|
|
| 182 |
if not isinstance(predictions, dict):
|
| 183 |
raise ValueError("predictions must be a dictionary")
|
| 184 |
|
|
|
|
|
|
|
|
|
|
| 185 |
print("Building GLB scene")
|
| 186 |
selected_frame_idx = None
|
| 187 |
if filter_by_frames != "all" and filter_by_frames != "All":
|
|
|
|
| 191 |
except (ValueError, IndexError):
|
| 192 |
pass
|
| 193 |
|
| 194 |
+
# Always use Pointmap Branch
|
| 195 |
+
print("Using Pointmap Branch")
|
| 196 |
+
if "world_points" not in predictions:
|
| 197 |
+
raise ValueError(
|
| 198 |
+
"world_points not found in predictions. Pointmap Branch requires 'world_points' key. "
|
| 199 |
+
"Depthmap and Camera branches have been removed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
)
|
| 201 |
|
| 202 |
+
pred_world_points = predictions["world_points"]
|
| 203 |
+
|
| 204 |
# Get images from predictions
|
| 205 |
images = predictions["images"]
|
| 206 |
# Use extrinsic matrices instead of pred_extrinsic_list
|
| 207 |
camera_matrices = predictions["extrinsic"]
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
if selected_frame_idx is not None:
|
| 210 |
pred_world_points = pred_world_points[selected_frame_idx][None]
|
|
|
|
| 211 |
images = images[selected_frame_idx][None]
|
| 212 |
camera_matrices = camera_matrices[selected_frame_idx][None]
|
| 213 |
|
|
|
|
| 219 |
colors_rgb = images
|
| 220 |
colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)
|
| 221 |
|
| 222 |
+
# Create mask for filtering
|
| 223 |
+
mask = np.ones(len(vertices_3d), dtype=bool)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
final_mask = predictions["final_mask"].reshape(-1)
|
| 225 |
|
| 226 |
if mask_black_bg:
|
| 227 |
+
black_bg_mask = colors_rgb.sum(axis=1) >= 16
|
| 228 |
+
mask = mask & black_bg_mask
|
| 229 |
|
| 230 |
if mask_white_bg:
|
| 231 |
# Filter out white background pixels (RGB values close to white)
|
| 232 |
# Consider pixels white if all RGB values are above 240
|
| 233 |
white_bg_mask = (
|
| 234 |
+
(colors_rgb[:, 0] > 240)
|
| 235 |
+
& (colors_rgb[:, 1] > 240)
|
| 236 |
+
& (colors_rgb[:, 2] > 240)
|
| 237 |
)
|
| 238 |
+
mask = mask & ~white_bg_mask
|
| 239 |
|
| 240 |
+
# Use final_mask when mask_ambiguous is checked
|
| 241 |
if mask_ambiguous:
|
| 242 |
+
mask = mask & final_mask
|
| 243 |
|
| 244 |
+
vertices_3d = vertices_3d[mask].copy()
|
| 245 |
+
colors_rgb = colors_rgb[mask].copy()
|
| 246 |
|
| 247 |
if vertices_3d is None or np.asarray(vertices_3d).size == 0:
|
| 248 |
vertices_3d = np.array([[1, 0, 0]])
|
|
|
|
| 278 |
else: # Assume already in HWC format
|
| 279 |
original_image_colors = images[0]
|
| 280 |
original_image_colors *= 255
|
| 281 |
+
# Get original final mask
|
|
|
|
| 282 |
original_final_mask = predictions["final_mask"][selected_frame_idx].reshape(
|
| 283 |
H, W
|
| 284 |
)
|
| 285 |
|
| 286 |
+
# Create mask based on final mask
|
| 287 |
+
mask = original_final_mask
|
|
|
|
|
|
|
| 288 |
|
| 289 |
# Additional background masks if needed
|
| 290 |
if mask_black_bg:
|
|
|
|
| 314 |
original_points * np.array([1, -1, 1], dtype=np.float32),
|
| 315 |
original_image_colors / 255.0,
|
| 316 |
frame_normals * np.array([1, -1, 1], dtype=np.float32),
|
| 317 |
+
mask=mask,
|
| 318 |
tri=True,
|
| 319 |
return_indices=False,
|
| 320 |
)
|
| 321 |
|
| 322 |
# Apply coordinate transformations to normals
|
| 323 |
vertex_normals = vertex_normals * np.array([1, -1, 1], dtype=np.float32)
|
|
|
|
| 324 |
else:
|
| 325 |
# Create faces and vertices using image_mesh without normals
|
| 326 |
faces, vertices, vertex_colors = image_mesh(
|
| 327 |
original_points * np.array([1, -1, 1], dtype=np.float32),
|
| 328 |
original_image_colors / 255.0,
|
| 329 |
+
mask=mask,
|
| 330 |
tri=True,
|
| 331 |
return_indices=False,
|
| 332 |
)
|
| 333 |
|
| 334 |
+
# vertices = vertices * np.array([1, -1, 1], dtype=np.float32)
|
| 335 |
|
| 336 |
# Create trimesh object with optional normals
|
| 337 |
mesh_data = trimesh.Trimesh(
|
| 338 |
+
vertices=vertices * np.array([1, -1, 1], dtype=np.float32),
|
| 339 |
faces=faces,
|
| 340 |
vertex_colors=(vertex_colors * 255).astype(np.uint8),
|
| 341 |
vertex_normals=(vertex_normals if vertex_normals is not None else None),
|
|
|
|
| 352 |
|
| 353 |
# Get data for this frame
|
| 354 |
frame_points = pred_world_points[frame_idx]
|
|
|
|
| 355 |
frame_final_mask = predictions["final_mask"][frame_idx]
|
| 356 |
|
| 357 |
# Get frame image
|
|
|
|
| 360 |
else: # Assume already in HWC format
|
| 361 |
frame_image = images[frame_idx]
|
| 362 |
frame_image *= 255
|
| 363 |
+
# Create mask for this frame using final_mask
|
| 364 |
+
mask = frame_final_mask
|
| 365 |
+
|
| 366 |
+
# Additional background masks if needed
|
| 367 |
+
if mask_black_bg:
|
| 368 |
+
black_bg_mask = frame_image.sum(axis=2) >= 16
|
| 369 |
+
mask = mask & black_bg_mask
|
| 370 |
+
|
| 371 |
+
if mask_white_bg:
|
| 372 |
+
white_bg_mask = ~(
|
| 373 |
+
(frame_image[:, :, 0] > 240)
|
| 374 |
+
& (frame_image[:, :, 1] > 240)
|
| 375 |
+
& (frame_image[:, :, 2] > 240)
|
| 376 |
+
)
|
| 377 |
+
mask = mask & white_bg_mask
|
| 378 |
|
| 379 |
# Create mesh for this frame
|
| 380 |
faces, vertices, vertex_colors = image_mesh(
|
| 381 |
frame_points * np.array([1, -1, 1], dtype=np.float32),
|
| 382 |
frame_image / 255.0,
|
| 383 |
+
mask=mask,
|
| 384 |
tri=True,
|
| 385 |
return_indices=False,
|
| 386 |
)
|
|
|
|
| 400 |
|
| 401 |
# Prepare 4x4 matrices for camera extrinsics
|
| 402 |
num_cameras = len(camera_matrices)
|
|
|
|
|
|
|
|
|
|
| 403 |
|
| 404 |
if show_cam:
|
| 405 |
# Add camera models to the scene
|
|
|
|
| 410 |
current_color = tuple(int(255 * x) for x in rgba_color[:3])
|
| 411 |
|
| 412 |
integrate_camera_into_scene(
|
| 413 |
+
scene_3d, world_to_camera, current_color, scene_scale
|
| 414 |
)
|
| 415 |
|
| 416 |
# Align scene to the observation of the first camera
|