aknapitsch user commited on
Commit
9f367f7
·
1 Parent(s): 9a006f8

finished mesh view

Browse files
Files changed (2) hide show
  1. app.py +165 -110
  2. mapanything/utils/hf_utils/visual_util.py +50 -137
app.py CHANGED
@@ -48,26 +48,6 @@ def get_logo_base64():
48
  return None
49
 
50
 
51
- # MapAnything Configuration
52
- # high_level_config = {
53
- # "path": "configs/train.yaml",
54
- # "hf_model_name": "facebook/MapAnything",
55
- # "model_str": "mapanything",
56
- # "config_overrides": [
57
- # "machine=aws",
58
- # "model=mapanything",
59
- # "model/task=images_only",
60
- # "model.encoder.uses_torch_hub=false",
61
- # ],
62
- # "checkpoint_name": "mapa_curri_24v_13d_48ipg_64g.pth",
63
- # "config_name": "config.json",
64
- # "trained_with_amp": True,
65
- # "trained_with_amp_dtype": "fp16",
66
- # "data_norm_type": "dinov2",
67
- # "patch_size": 14,
68
- # "resolution": 518,
69
- # }
70
-
71
  # MapAnything Configuration
72
  high_level_config = {
73
  "path": "configs/train.yaml",
@@ -96,7 +76,13 @@ model = None
96
  # 1) Core model inference
97
  # -------------------------------------------------------------------------
98
  @spaces.GPU(duration=120)
99
- def run_model(target_dir, apply_mask=True, mask_edges=True):
 
 
 
 
 
 
100
  """
101
  Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
102
  """
@@ -204,7 +190,7 @@ def run_model(target_dir, apply_mask=True, mask_edges=True):
204
 
205
  # Process data for visualization tabs (depth, normal, measure)
206
  processed_data = process_predictions_for_visualization(
207
- predictions, views, high_level_config
208
  )
209
 
210
  # Clean up
@@ -246,9 +232,7 @@ def update_depth_view(processed_data, view_index):
246
  if view_data is None or view_data["depth"] is None:
247
  return None
248
 
249
- # Use confidence filtering if available
250
- confidence = view_data.get("confidence")
251
- return colorize_depth(view_data["depth"], confidence=confidence)
252
 
253
 
254
  def update_normal_view(processed_data, view_index):
@@ -257,9 +241,7 @@ def update_normal_view(processed_data, view_index):
257
  if view_data is None or view_data["normal"] is None:
258
  return None
259
 
260
- # Use confidence filtering if available
261
- confidence = view_data.get("confidence")
262
- return colorize_normal(view_data["normal"], confidence=confidence)
263
 
264
 
265
  def update_measure_view(processed_data, view_index):
@@ -475,11 +457,11 @@ def gradio_demo(
475
  target_dir,
476
  frame_filter="All",
477
  show_cam=True,
478
- filter_sky=False,
479
  filter_black_bg=False,
480
  filter_white_bg=False,
481
  apply_mask=True,
482
  mask_edges=True,
 
483
  ):
484
  """
485
  Perform reconstruction using the already-created target_dir/images.
@@ -491,9 +473,6 @@ def gradio_demo(
491
  gc.collect()
492
  torch.cuda.empty_cache()
493
 
494
- # Always use Pointmap Branch for MapAnything
495
- prediction_mode = "Pointmap Branch"
496
-
497
  # Prepare frame_filter dropdown
498
  target_dir_images = os.path.join(target_dir, "images")
499
  all_files = (
@@ -519,7 +498,7 @@ def gradio_demo(
519
  # Build a GLB file name
520
  glbfile = os.path.join(
521
  target_dir,
522
- f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_sky{filter_sky}_black{filter_black_bg}_white{filter_white_bg}_pred{prediction_mode.replace(' ', '_')}.glb",
523
  )
524
 
525
  # Convert predictions to GLB
@@ -527,12 +506,9 @@ def gradio_demo(
527
  predictions,
528
  filter_by_frames=frame_filter,
529
  show_cam=show_cam,
530
- target_dir=target_dir,
531
- prediction_mode=prediction_mode,
532
- mask_sky=filter_sky,
533
  mask_black_bg=filter_black_bg,
534
  mask_white_bg=filter_white_bg,
535
- as_mesh=True, # Default to True for reconstruction
536
  )
537
  glbscene.export(file_obj=glbfile)
538
 
@@ -575,42 +551,19 @@ def gradio_demo(
575
  # -------------------------------------------------------------------------
576
  # 5) Helper functions for UI resets + re-visualization
577
  # -------------------------------------------------------------------------
578
- def apply_confidence_filtering(data, confidence, conf_thres):
579
- """Apply confidence filtering to data arrays"""
580
- if confidence is None or data is None:
581
- return data
582
-
583
- # Convert confidence threshold from percentage to confidence value
584
- conf_threshold = np.percentile(confidence, conf_thres)
585
- conf_mask = (confidence >= conf_threshold) & (confidence > 1e-5)
586
-
587
- # conf_mask = confidence >= (conf_thres)
588
-
589
- # Apply mask to data
590
- if len(data.shape) == 3: # 3D data (H, W, C)
591
- filtered_data = data.copy()
592
- for c in range(data.shape[2]):
593
- filtered_data[:, :, c] = np.where(conf_mask, data[:, :, c], 0)
594
- elif len(data.shape) == 2: # 2D data (H, W)
595
- filtered_data = np.where(conf_mask, data, 0)
596
- else:
597
- filtered_data = data
598
-
599
- return filtered_data
600
-
601
-
602
- def colorize_depth(depth_map, confidence=None, conf_thres=None):
603
- """Convert depth map to colorized visualization with optional confidence filtering"""
604
  if depth_map is None:
605
  return None
606
 
607
- # Apply confidence filtering if available
608
- if confidence is not None and conf_thres is not None:
609
- depth_map = apply_confidence_filtering(depth_map, confidence, conf_thres)
610
-
611
  # Normalize depth to 0-1 range
612
  depth_normalized = depth_map.copy()
613
  valid_mask = depth_normalized > 0
 
 
 
 
 
614
  if valid_mask.sum() > 0:
615
  valid_depths = depth_normalized[valid_mask]
616
  p5 = np.percentile(valid_depths, 5)
@@ -622,8 +575,6 @@ def colorize_depth(depth_map, confidence=None, conf_thres=None):
622
  import matplotlib.pyplot as plt
623
 
624
  colormap = plt.cm.turbo_r
625
- # colormap = plt.cm.plasma
626
- # colormap = plt.cm.viridis
627
  colored = colormap(depth_normalized)
628
  colored = (colored[:, :, :3] * 255).astype(np.uint8)
629
 
@@ -633,34 +584,36 @@ def colorize_depth(depth_map, confidence=None, conf_thres=None):
633
  return colored
634
 
635
 
636
- def colorize_normal(normal_map, confidence=None, conf_thres=None):
637
- """Convert normal map to colorized visualization with optional confidence filtering"""
638
  if normal_map is None:
639
  return None
640
 
641
- # Apply confidence filtering if available
642
- if confidence is not None and conf_thres is not None:
643
- normal_map = apply_confidence_filtering(normal_map, confidence, conf_thres)
 
 
 
 
644
 
645
  # Normalize normals to [0, 1] range for visualization
646
- normal_vis = (normal_map + 1.0) / 2.0
647
  normal_vis = (normal_vis * 255).astype(np.uint8)
648
 
649
  return normal_vis
650
 
651
 
652
- def process_predictions_for_visualization(predictions, views, high_level_config):
 
 
653
  """Extract depth, normal, and 3D points from predictions for visualization"""
654
  processed_data = {}
655
 
656
- # Check if confidence data is available in any view
657
- has_confidence_data = False
658
-
659
  # Process each view
660
  for view_idx, view in enumerate(views):
661
  # Get image
662
  image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
663
- # image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
664
 
665
  # Get predicted points
666
  pred_pts3d = predictions["world_points"][view_idx]
@@ -672,12 +625,32 @@ def process_predictions_for_visualization(predictions, views, high_level_config)
672
  "depth": None,
673
  "normal": None,
674
  "mask": None,
675
- "confidence": None,
676
- "has_confidence": has_confidence_data,
677
  }
678
 
679
- view_data["mask"] = predictions["final_mask"][view_idx]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
 
 
681
  view_data["depth"] = predictions["depth"][view_idx].squeeze()
682
 
683
  normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
@@ -872,7 +845,6 @@ def update_visualization(
872
  frame_filter,
873
  show_cam,
874
  is_example,
875
- filter_sky=False,
876
  filter_black_bg=False,
877
  filter_white_bg=False,
878
  show_mesh=True,
@@ -905,12 +877,9 @@ def update_visualization(
905
  loaded = np.load(predictions_path, allow_pickle=True)
906
  predictions = {key: loaded[key] for key in loaded.keys()}
907
 
908
- # Always use Pointmap Branch for MapAnything
909
- prediction_mode = "Pointmap Branch"
910
-
911
  glbfile = os.path.join(
912
  target_dir,
913
- f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_sky{filter_sky}_black{filter_black_bg}_white{filter_white_bg}_pred{prediction_mode.replace(' ', '_')}.glb",
914
  )
915
 
916
  if not os.path.exists(glbfile):
@@ -918,9 +887,6 @@ def update_visualization(
918
  predictions,
919
  filter_by_frames=frame_filter,
920
  show_cam=show_cam,
921
- target_dir=target_dir,
922
- prediction_mode=prediction_mode,
923
- mask_sky=filter_sky,
924
  mask_black_bg=filter_black_bg,
925
  mask_white_bg=filter_white_bg,
926
  as_mesh=show_mesh,
@@ -933,6 +899,77 @@ def update_visualization(
933
  )
934
 
935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936
  # -------------------------------------------------------------------------
937
  # Example scene functions
938
  # -------------------------------------------------------------------------
@@ -1147,9 +1184,6 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1147
  gr.Markdown("### Pointcloud options (live updates)")
1148
  show_cam = gr.Checkbox(label="Show Camera", value=True)
1149
  show_mesh = gr.Checkbox(label="Show mesh", value=True)
1150
- filter_sky = gr.Checkbox(
1151
- label="Filter Sky (using skyseg.onnx)", value=False
1152
- )
1153
  filter_black_bg = gr.Checkbox(
1154
  label="Filter Black Background", value=False
1155
  )
@@ -1160,7 +1194,6 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1160
  apply_mask_checkbox = gr.Checkbox(
1161
  label="Apply non-ambiguous mask", value=True
1162
  )
1163
- mask_edges_checkbox = apply_mask_checkbox
1164
  # ---------------------- Example Scenes Section ----------------------
1165
  gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
1166
  gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
@@ -1223,11 +1256,10 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1223
  target_dir_output,
1224
  frame_filter,
1225
  show_cam,
1226
- filter_sky,
1227
  filter_black_bg,
1228
  filter_white_bg,
1229
  apply_mask_checkbox,
1230
- mask_edges_checkbox,
1231
  ],
1232
  outputs=[
1233
  reconstruction_output,
@@ -1258,6 +1290,9 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1258
  frame_filter,
1259
  show_cam,
1260
  is_example,
 
 
 
1261
  ],
1262
  [reconstruction_output, log_output],
1263
  )
@@ -1271,31 +1306,35 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1271
  ],
1272
  [reconstruction_output, log_output],
1273
  )
1274
- filter_sky.change(
1275
  update_visualization,
1276
  [
1277
  target_dir_output,
1278
  frame_filter,
1279
  show_cam,
1280
  is_example,
1281
- filter_sky,
1282
  filter_black_bg,
1283
  filter_white_bg,
1284
  ],
1285
  [reconstruction_output, log_output],
1286
- )
1287
- filter_black_bg.change(
1288
- update_visualization,
1289
- [
1290
  target_dir_output,
1291
- frame_filter,
1292
- show_cam,
1293
- is_example,
1294
- filter_sky,
1295
  filter_black_bg,
1296
  filter_white_bg,
 
 
 
 
 
 
 
 
 
 
 
1297
  ],
1298
- [reconstruction_output, log_output],
1299
  )
1300
  filter_white_bg.change(
1301
  update_visualization,
@@ -1304,12 +1343,29 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1304
  frame_filter,
1305
  show_cam,
1306
  is_example,
1307
- filter_sky,
1308
  filter_black_bg,
1309
  filter_white_bg,
1310
  show_mesh,
1311
  ],
1312
  [reconstruction_output, log_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1313
  )
1314
 
1315
  show_mesh.change(
@@ -1319,7 +1375,6 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1319
  frame_filter,
1320
  show_cam,
1321
  is_example,
1322
- filter_sky,
1323
  filter_black_bg,
1324
  filter_white_bg,
1325
  show_mesh,
 
48
  return None
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # MapAnything Configuration
52
  high_level_config = {
53
  "path": "configs/train.yaml",
 
76
  # 1) Core model inference
77
  # -------------------------------------------------------------------------
78
  @spaces.GPU(duration=120)
79
+ def run_model(
80
+ target_dir,
81
+ apply_mask=True,
82
+ mask_edges=True,
83
+ filter_black_bg=False,
84
+ filter_white_bg=False,
85
+ ):
86
  """
87
  Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
88
  """
 
190
 
191
  # Process data for visualization tabs (depth, normal, measure)
192
  processed_data = process_predictions_for_visualization(
193
+ predictions, views, high_level_config, filter_black_bg, filter_white_bg
194
  )
195
 
196
  # Clean up
 
232
  if view_data is None or view_data["depth"] is None:
233
  return None
234
 
235
+ return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
 
 
236
 
237
 
238
  def update_normal_view(processed_data, view_index):
 
241
  if view_data is None or view_data["normal"] is None:
242
  return None
243
 
244
+ return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
 
 
245
 
246
 
247
  def update_measure_view(processed_data, view_index):
 
457
  target_dir,
458
  frame_filter="All",
459
  show_cam=True,
 
460
  filter_black_bg=False,
461
  filter_white_bg=False,
462
  apply_mask=True,
463
  mask_edges=True,
464
+ show_mesh=True,
465
  ):
466
  """
467
  Perform reconstruction using the already-created target_dir/images.
 
473
  gc.collect()
474
  torch.cuda.empty_cache()
475
 
 
 
 
476
  # Prepare frame_filter dropdown
477
  target_dir_images = os.path.join(target_dir, "images")
478
  all_files = (
 
498
  # Build a GLB file name
499
  glbfile = os.path.join(
500
  target_dir,
501
+ f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
502
  )
503
 
504
  # Convert predictions to GLB
 
506
  predictions,
507
  filter_by_frames=frame_filter,
508
  show_cam=show_cam,
 
 
 
509
  mask_black_bg=filter_black_bg,
510
  mask_white_bg=filter_white_bg,
511
+ as_mesh=show_mesh, # Use the show_mesh parameter
512
  )
513
  glbscene.export(file_obj=glbfile)
514
 
 
551
  # -------------------------------------------------------------------------
552
  # 5) Helper functions for UI resets + re-visualization
553
  # -------------------------------------------------------------------------
554
+ def colorize_depth(depth_map, mask=None):
555
+ """Convert depth map to colorized visualization with optional mask"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  if depth_map is None:
557
  return None
558
 
 
 
 
 
559
  # Normalize depth to 0-1 range
560
  depth_normalized = depth_map.copy()
561
  valid_mask = depth_normalized > 0
562
+
563
+ # Apply additional mask if provided (for background filtering)
564
+ if mask is not None:
565
+ valid_mask = valid_mask & mask
566
+
567
  if valid_mask.sum() > 0:
568
  valid_depths = depth_normalized[valid_mask]
569
  p5 = np.percentile(valid_depths, 5)
 
575
  import matplotlib.pyplot as plt
576
 
577
  colormap = plt.cm.turbo_r
 
 
578
  colored = colormap(depth_normalized)
579
  colored = (colored[:, :, :3] * 255).astype(np.uint8)
580
 
 
584
  return colored
585
 
586
 
587
+ def colorize_normal(normal_map, mask=None):
588
+ """Convert normal map to colorized visualization with optional mask"""
589
  if normal_map is None:
590
  return None
591
 
592
+ # Create a copy for modification
593
+ normal_vis = normal_map.copy()
594
+
595
+ # Apply mask if provided (set masked areas to [0, 0, 0] which becomes grey after normalization)
596
+ if mask is not None:
597
+ invalid_mask = ~mask
598
+ normal_vis[invalid_mask] = [0, 0, 0] # Set invalid areas to zero
599
 
600
  # Normalize normals to [0, 1] range for visualization
601
+ normal_vis = (normal_vis + 1.0) / 2.0
602
  normal_vis = (normal_vis * 255).astype(np.uint8)
603
 
604
  return normal_vis
605
 
606
 
607
+ def process_predictions_for_visualization(
608
+ predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
609
+ ):
610
  """Extract depth, normal, and 3D points from predictions for visualization"""
611
  processed_data = {}
612
 
 
 
 
613
  # Process each view
614
  for view_idx, view in enumerate(views):
615
  # Get image
616
  image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
 
617
 
618
  # Get predicted points
619
  pred_pts3d = predictions["world_points"][view_idx]
 
625
  "depth": None,
626
  "normal": None,
627
  "mask": None,
 
 
628
  }
629
 
630
+ # Start with the final mask from predictions
631
+ mask = predictions["final_mask"][view_idx].copy()
632
+
633
+ # Apply black background filtering if enabled
634
+ if filter_black_bg:
635
+ # Get the image colors (ensure they're in 0-255 range)
636
+ view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
637
+ # Filter out black background pixels (sum of RGB < 16)
638
+ black_bg_mask = view_colors.sum(axis=2) >= 16
639
+ mask = mask & black_bg_mask
640
+
641
+ # Apply white background filtering if enabled
642
+ if filter_white_bg:
643
+ # Get the image colors (ensure they're in 0-255 range)
644
+ view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
645
+ # Filter out white background pixels (all RGB > 240)
646
+ white_bg_mask = ~(
647
+ (view_colors[:, :, 0] > 240)
648
+ & (view_colors[:, :, 1] > 240)
649
+ & (view_colors[:, :, 2] > 240)
650
+ )
651
+ mask = mask & white_bg_mask
652
 
653
+ view_data["mask"] = mask
654
  view_data["depth"] = predictions["depth"][view_idx].squeeze()
655
 
656
  normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
 
845
  frame_filter,
846
  show_cam,
847
  is_example,
 
848
  filter_black_bg=False,
849
  filter_white_bg=False,
850
  show_mesh=True,
 
877
  loaded = np.load(predictions_path, allow_pickle=True)
878
  predictions = {key: loaded[key] for key in loaded.keys()}
879
 
 
 
 
880
  glbfile = os.path.join(
881
  target_dir,
882
+ f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
883
  )
884
 
885
  if not os.path.exists(glbfile):
 
887
  predictions,
888
  filter_by_frames=frame_filter,
889
  show_cam=show_cam,
 
 
 
890
  mask_black_bg=filter_black_bg,
891
  mask_white_bg=filter_white_bg,
892
  as_mesh=show_mesh,
 
899
  )
900
 
901
 
902
+ def update_all_views_on_filter_change(
903
+ target_dir,
904
+ filter_black_bg,
905
+ filter_white_bg,
906
+ processed_data,
907
+ depth_view_selector,
908
+ normal_view_selector,
909
+ measure_view_selector,
910
+ ):
911
+ """
912
+ Update all individual view tabs when background filtering checkboxes change.
913
+ This regenerates the processed data with new filtering and updates all views.
914
+ """
915
+ # Check if we have a valid target directory and predictions
916
+ if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
917
+ return processed_data, None, None, None, []
918
+
919
+ predictions_path = os.path.join(target_dir, "predictions.npz")
920
+ if not os.path.exists(predictions_path):
921
+ return processed_data, None, None, None, []
922
+
923
+ try:
924
+ # Load the original predictions and views
925
+ loaded = np.load(predictions_path, allow_pickle=True)
926
+ predictions = {key: loaded[key] for key in loaded.keys()}
927
+
928
+ # Load images using MapAnything's load_images function
929
+ image_folder_path = os.path.join(target_dir, "images")
930
+ views = load_images(image_folder_path)
931
+
932
+ # Regenerate processed data with new filtering settings
933
+ new_processed_data = process_predictions_for_visualization(
934
+ predictions, views, high_level_config, filter_black_bg, filter_white_bg
935
+ )
936
+
937
+ # Get current view indices
938
+ try:
939
+ depth_view_idx = (
940
+ int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
941
+ )
942
+ except:
943
+ depth_view_idx = 0
944
+
945
+ try:
946
+ normal_view_idx = (
947
+ int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
948
+ )
949
+ except:
950
+ normal_view_idx = 0
951
+
952
+ try:
953
+ measure_view_idx = (
954
+ int(measure_view_selector.split()[1]) - 1
955
+ if measure_view_selector
956
+ else 0
957
+ )
958
+ except:
959
+ measure_view_idx = 0
960
+
961
+ # Update all views with new filtered data
962
+ depth_vis = update_depth_view(new_processed_data, depth_view_idx)
963
+ normal_vis = update_normal_view(new_processed_data, normal_view_idx)
964
+ measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)
965
+
966
+ return new_processed_data, depth_vis, normal_vis, measure_img, []
967
+
968
+ except Exception as e:
969
+ print(f"Error updating views on filter change: {e}")
970
+ return processed_data, None, None, None, []
971
+
972
+
973
  # -------------------------------------------------------------------------
974
  # Example scene functions
975
  # -------------------------------------------------------------------------
 
1184
  gr.Markdown("### Pointcloud options (live updates)")
1185
  show_cam = gr.Checkbox(label="Show Camera", value=True)
1186
  show_mesh = gr.Checkbox(label="Show mesh", value=True)
 
 
 
1187
  filter_black_bg = gr.Checkbox(
1188
  label="Filter Black Background", value=False
1189
  )
 
1194
  apply_mask_checkbox = gr.Checkbox(
1195
  label="Apply non-ambiguous mask", value=True
1196
  )
 
1197
  # ---------------------- Example Scenes Section ----------------------
1198
  gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
1199
  gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
 
1256
  target_dir_output,
1257
  frame_filter,
1258
  show_cam,
 
1259
  filter_black_bg,
1260
  filter_white_bg,
1261
  apply_mask_checkbox,
1262
+ show_mesh,
1263
  ],
1264
  outputs=[
1265
  reconstruction_output,
 
1290
  frame_filter,
1291
  show_cam,
1292
  is_example,
1293
+ filter_black_bg,
1294
+ filter_white_bg,
1295
+ show_mesh,
1296
  ],
1297
  [reconstruction_output, log_output],
1298
  )
 
1306
  ],
1307
  [reconstruction_output, log_output],
1308
  )
1309
+ filter_black_bg.change(
1310
  update_visualization,
1311
  [
1312
  target_dir_output,
1313
  frame_filter,
1314
  show_cam,
1315
  is_example,
 
1316
  filter_black_bg,
1317
  filter_white_bg,
1318
  ],
1319
  [reconstruction_output, log_output],
1320
+ ).then(
1321
+ fn=update_all_views_on_filter_change,
1322
+ inputs=[
 
1323
  target_dir_output,
 
 
 
 
1324
  filter_black_bg,
1325
  filter_white_bg,
1326
+ processed_data_state,
1327
+ depth_view_selector,
1328
+ normal_view_selector,
1329
+ measure_view_selector,
1330
+ ],
1331
+ outputs=[
1332
+ processed_data_state,
1333
+ depth_map,
1334
+ normal_map,
1335
+ measure_image,
1336
+ measure_points_state,
1337
  ],
 
1338
  )
1339
  filter_white_bg.change(
1340
  update_visualization,
 
1343
  frame_filter,
1344
  show_cam,
1345
  is_example,
 
1346
  filter_black_bg,
1347
  filter_white_bg,
1348
  show_mesh,
1349
  ],
1350
  [reconstruction_output, log_output],
1351
+ ).then(
1352
+ fn=update_all_views_on_filter_change,
1353
+ inputs=[
1354
+ target_dir_output,
1355
+ filter_black_bg,
1356
+ filter_white_bg,
1357
+ processed_data_state,
1358
+ depth_view_selector,
1359
+ normal_view_selector,
1360
+ measure_view_selector,
1361
+ ],
1362
+ outputs=[
1363
+ processed_data_state,
1364
+ depth_map,
1365
+ normal_map,
1366
+ measure_image,
1367
+ measure_points_state,
1368
+ ],
1369
  )
1370
 
1371
  show_mesh.change(
 
1375
  frame_filter,
1376
  show_cam,
1377
  is_example,
 
1378
  filter_black_bg,
1379
  filter_white_bg,
1380
  show_mesh,
mapanything/utils/hf_utils/visual_util.py CHANGED
@@ -107,13 +107,13 @@ def image_mesh(
107
  *vertex_attrs (np.ndarray): vertex attributes in corresponding order with input image_attrs
108
  indices (np.ndarray, optional): indices of vertices in the original mesh
109
  """
110
- assert (len(image_attrs) > 0) or (mask is not None), (
111
- "At least one of image_attrs or mask should be provided"
112
- )
113
  height, width = next(image_attrs).shape[:2] if mask is None else mask.shape
114
- assert all(img.shape[:2] == (height, width) for img in image_attrs), (
115
- "All image_attrs should have the same shape"
116
- )
117
 
118
  row_faces = np.stack(
119
  [
@@ -151,14 +151,10 @@ def image_mesh(
151
 
152
  def predictions_to_glb(
153
  predictions,
154
- conf_thres=50.0,
155
  filter_by_frames="all",
156
  mask_black_bg=False,
157
  mask_white_bg=False,
158
  show_cam=True,
159
- mask_sky=False,
160
- target_dir=None,
161
- prediction_mode="Predicted Pointmap",
162
  mask_ambiguous=False,
163
  as_mesh=True,
164
  ) -> trimesh.Scene:
@@ -168,17 +164,12 @@ def predictions_to_glb(
168
  Args:
169
  predictions (dict): Dictionary containing model predictions with keys:
170
  - world_points: 3D point coordinates (S, H, W, 3)
171
- - world_points_conf: Confidence scores (S, H, W)
172
  - images: Input images (S, H, W, 3)
173
  - extrinsic: Camera extrinsic matrices (S, 3, 4)
174
- conf_thres (float): Percentage of low-confidence points to filter out (default: 50.0)
175
  filter_by_frames (str): Frame filter specification (default: "all")
176
  mask_black_bg (bool): Mask out black background pixels (default: False)
177
  mask_white_bg (bool): Mask out white background pixels (default: False)
178
  show_cam (bool): Include camera visualization (default: True)
179
- mask_sky (bool): Apply sky segmentation mask (default: False)
180
- target_dir (str): Output directory for intermediate files (default: None)
181
- prediction_mode (str): Prediction mode selector (default: "Predicted Pointmap")
182
  mask_ambiguous (bool): Apply final mask to filter ambiguous predictions (default: False)
183
  as_mesh (bool): Represent the data as a mesh instead of point cloud (default: False)
184
 
@@ -191,9 +182,6 @@ def predictions_to_glb(
191
  if not isinstance(predictions, dict):
192
  raise ValueError("predictions must be a dictionary")
193
 
194
- if conf_thres is None:
195
- conf_thres = 10.0
196
-
197
  print("Building GLB scene")
198
  selected_frame_idx = None
199
  if filter_by_frames != "all" and filter_by_frames != "All":
@@ -203,95 +191,23 @@ def predictions_to_glb(
203
  except (ValueError, IndexError):
204
  pass
205
 
206
- if "Pointmap" in prediction_mode:
207
- print("Using Pointmap Branch")
208
- if "world_points" in predictions:
209
- # import ipdb
210
-
211
- # ipdb.set_trace()
212
-
213
- pred_world_points = predictions[
214
- "world_points"
215
- ] # No batch dimension to remove
216
- pred_world_points_conf = predictions.get(
217
- "confidence", np.ones_like(pred_world_points[..., 0])
218
- )
219
- else:
220
- print(
221
- "Warning: world_points not found in predictions, falling back to depth-based points"
222
- )
223
- pred_world_points = predictions["world_points_from_depth"]
224
- pred_world_points_conf = predictions.get(
225
- "depth_conf", np.ones_like(pred_world_points[..., 0])
226
- )
227
- else:
228
- print("Using Depthmap and Camera Branch")
229
- pred_world_points = predictions["world_points_from_depth"]
230
- pred_world_points_conf = predictions.get(
231
- "depth_conf", np.ones_like(pred_world_points[..., 0])
232
  )
233
 
 
 
234
  # Get images from predictions
235
  images = predictions["images"]
236
  # Use extrinsic matrices instead of pred_extrinsic_list
237
  camera_matrices = predictions["extrinsic"]
238
 
239
- if mask_sky:
240
- if target_dir is not None:
241
- import onnxruntime
242
-
243
- skyseg_session = None
244
- target_dir_images = target_dir + "/images"
245
- image_list = sorted(os.listdir(target_dir_images))
246
- sky_mask_list = []
247
-
248
- # Get the shape of pred_world_points_conf to match
249
- S, H, W = (
250
- pred_world_points_conf.shape
251
- if hasattr(pred_world_points_conf, "shape")
252
- else (len(images), images.shape[1], images.shape[2])
253
- )
254
-
255
- # Download skyseg.onnx if it doesn't exist
256
- if not os.path.exists("skyseg.onnx"):
257
- print("Downloading skyseg.onnx...")
258
- download_file_from_url(
259
- "https://huggingface.co/JianyuanWang/skyseg/resolve/main/skyseg.onnx",
260
- "skyseg.onnx",
261
- )
262
-
263
- for i, image_name in enumerate(image_list):
264
- image_filepath = os.path.join(target_dir_images, image_name)
265
- mask_filepath = os.path.join(target_dir, "sky_masks", image_name)
266
-
267
- # Check if mask already exists
268
- if os.path.exists(mask_filepath):
269
- # Load existing mask
270
- sky_mask = cv2.imread(mask_filepath, cv2.IMREAD_GRAYSCALE)
271
- else:
272
- # Generate new mask
273
- if skyseg_session is None:
274
- skyseg_session = onnxruntime.InferenceSession("skyseg.onnx")
275
- sky_mask = segment_sky(
276
- image_filepath, skyseg_session, mask_filepath
277
- )
278
-
279
- # Resize mask to match H×W if needed
280
- if sky_mask.shape[0] != H or sky_mask.shape[1] != W:
281
- sky_mask = cv2.resize(sky_mask, (W, H))
282
-
283
- sky_mask_list.append(sky_mask)
284
-
285
- # Convert list to numpy array with shape S×H×W
286
- sky_mask_array = np.array(sky_mask_list)
287
-
288
- # Apply sky mask to confidence scores
289
- sky_mask_binary = (sky_mask_array > 0.1).astype(np.float32)
290
- pred_world_points_conf = pred_world_points_conf * sky_mask_binary
291
-
292
  if selected_frame_idx is not None:
293
  pred_world_points = pred_world_points[selected_frame_idx][None]
294
- pred_world_points_conf = pred_world_points_conf[selected_frame_idx][None]
295
  images = images[selected_frame_idx][None]
296
  camera_matrices = camera_matrices[selected_frame_idx][None]
297
 
@@ -303,36 +219,30 @@ def predictions_to_glb(
303
  colors_rgb = images
304
  colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)
305
 
306
- conf = pred_world_points_conf.reshape(-1)
307
- # Convert percentage threshold to actual confidence value
308
- if conf_thres == 0.0:
309
- conf_threshold = 0.0
310
- else:
311
- conf_threshold = np.percentile(conf, conf_thres)
312
-
313
- conf_mask = (conf >= conf_threshold) & (conf > 1e-5)
314
  final_mask = predictions["final_mask"].reshape(-1)
315
 
316
  if mask_black_bg:
317
- black_bg_mask = colors_rgb.sum(axis=1) >= 16 / 255.0
318
- conf_mask = conf_mask & black_bg_mask
319
 
320
  if mask_white_bg:
321
  # Filter out white background pixels (RGB values close to white)
322
  # Consider pixels white if all RGB values are above 240
323
  white_bg_mask = (
324
- (colors_rgb[:, 0] > 240 / 255.0)
325
- & (colors_rgb[:, 1] > 240 / 255.0)
326
- & (colors_rgb[:, 2] > 240 / 255.0)
327
  )
328
- conf_mask = conf_mask & white_bg_mask
329
 
330
- # Use final_mask with conf_mask when mask_ambiguous is checked
331
  if mask_ambiguous:
332
- conf_mask = conf_mask & final_mask
333
 
334
- vertices_3d = vertices_3d[conf_mask].copy()
335
- colors_rgb = colors_rgb[conf_mask].copy()
336
 
337
  if vertices_3d is None or np.asarray(vertices_3d).size == 0:
338
  vertices_3d = np.array([[1, 0, 0]])
@@ -368,16 +278,13 @@ def predictions_to_glb(
368
  else: # Assume already in HWC format
369
  original_image_colors = images[0]
370
  original_image_colors *= 255
371
- # Create mask from confidence and other filters
372
- original_conf = pred_world_points_conf.reshape(H, W)
373
  original_final_mask = predictions["final_mask"][selected_frame_idx].reshape(
374
  H, W
375
  )
376
 
377
- # Apply thresholds to create mask
378
- mask = (original_conf >= conf_threshold) & (original_conf > 1e-5)
379
- if mask_ambiguous:
380
- mask = mask & original_final_mask
381
 
382
  # Additional background masks if needed
383
  if mask_black_bg:
@@ -407,29 +314,28 @@ def predictions_to_glb(
407
  original_points * np.array([1, -1, 1], dtype=np.float32),
408
  original_image_colors / 255.0,
409
  frame_normals * np.array([1, -1, 1], dtype=np.float32),
410
- mask=original_final_mask,
411
  tri=True,
412
  return_indices=False,
413
  )
414
 
415
  # Apply coordinate transformations to normals
416
  vertex_normals = vertex_normals * np.array([1, -1, 1], dtype=np.float32)
417
- # frame_normals = frame_normals * np.array([1, -1, 1], dtype=np.float32)
418
  else:
419
  # Create faces and vertices using image_mesh without normals
420
  faces, vertices, vertex_colors = image_mesh(
421
  original_points * np.array([1, -1, 1], dtype=np.float32),
422
  original_image_colors / 255.0,
423
- mask=original_final_mask,
424
  tri=True,
425
  return_indices=False,
426
  )
427
 
428
- vertices = vertices * np.array([1, -1, 1], dtype=np.float32)
429
 
430
  # Create trimesh object with optional normals
431
  mesh_data = trimesh.Trimesh(
432
- vertices=vertices,
433
  faces=faces,
434
  vertex_colors=(vertex_colors * 255).astype(np.uint8),
435
  vertex_normals=(vertex_normals if vertex_normals is not None else None),
@@ -446,7 +352,6 @@ def predictions_to_glb(
446
 
447
  # Get data for this frame
448
  frame_points = pred_world_points[frame_idx]
449
- frame_conf = pred_world_points_conf[frame_idx]
450
  frame_final_mask = predictions["final_mask"][frame_idx]
451
 
452
  # Get frame image
@@ -455,16 +360,27 @@ def predictions_to_glb(
455
  else: # Assume already in HWC format
456
  frame_image = images[frame_idx]
457
  frame_image *= 255
458
- # Create mask for this frame
459
- mask = (frame_conf >= conf_threshold) & (frame_conf > 1e-5)
460
- if mask_ambiguous:
461
- mask = mask | frame_final_mask
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  # Create mesh for this frame
464
  faces, vertices, vertex_colors = image_mesh(
465
  frame_points * np.array([1, -1, 1], dtype=np.float32),
466
  frame_image / 255.0,
467
- mask=frame_final_mask,
468
  tri=True,
469
  return_indices=False,
470
  )
@@ -484,9 +400,6 @@ def predictions_to_glb(
484
 
485
  # Prepare 4x4 matrices for camera extrinsics
486
  num_cameras = len(camera_matrices)
487
- # extrinsics_matrices = np.zeros((num_cameras, 4, 4))
488
- # extrinsics_matrices[:, :3, :4] = camera_matrices
489
- # extrinsics_matrices[:, 3, 3] = 1
490
 
491
  if show_cam:
492
  # Add camera models to the scene
@@ -497,7 +410,7 @@ def predictions_to_glb(
497
  current_color = tuple(int(255 * x) for x in rgba_color[:3])
498
 
499
  integrate_camera_into_scene(
500
- scene_3d, camera_to_world, current_color, scene_scale
501
  )
502
 
503
  # Align scene to the observation of the first camera
 
107
  *vertex_attrs (np.ndarray): vertex attributes in corresponding order with input image_attrs
108
  indices (np.ndarray, optional): indices of vertices in the original mesh
109
  """
110
+ assert (len(image_attrs) > 0) or (
111
+ mask is not None
112
+ ), "At least one of image_attrs or mask should be provided"
113
  height, width = next(image_attrs).shape[:2] if mask is None else mask.shape
114
+ assert all(
115
+ img.shape[:2] == (height, width) for img in image_attrs
116
+ ), "All image_attrs should have the same shape"
117
 
118
  row_faces = np.stack(
119
  [
 
151
 
152
  def predictions_to_glb(
153
  predictions,
 
154
  filter_by_frames="all",
155
  mask_black_bg=False,
156
  mask_white_bg=False,
157
  show_cam=True,
 
 
 
158
  mask_ambiguous=False,
159
  as_mesh=True,
160
  ) -> trimesh.Scene:
 
164
  Args:
165
  predictions (dict): Dictionary containing model predictions with keys:
166
  - world_points: 3D point coordinates (S, H, W, 3)
 
167
  - images: Input images (S, H, W, 3)
168
  - extrinsic: Camera extrinsic matrices (S, 3, 4)
 
169
  filter_by_frames (str): Frame filter specification (default: "all")
170
  mask_black_bg (bool): Mask out black background pixels (default: False)
171
  mask_white_bg (bool): Mask out white background pixels (default: False)
172
  show_cam (bool): Include camera visualization (default: True)
 
 
 
173
  mask_ambiguous (bool): Apply final mask to filter ambiguous predictions (default: False)
174
  as_mesh (bool): Represent the data as a mesh instead of point cloud (default: False)
175
 
 
182
  if not isinstance(predictions, dict):
183
  raise ValueError("predictions must be a dictionary")
184
 
 
 
 
185
  print("Building GLB scene")
186
  selected_frame_idx = None
187
  if filter_by_frames != "all" and filter_by_frames != "All":
 
191
  except (ValueError, IndexError):
192
  pass
193
 
194
+ # Always use Pointmap Branch
195
+ print("Using Pointmap Branch")
196
+ if "world_points" not in predictions:
197
+ raise ValueError(
198
+ "world_points not found in predictions. Pointmap Branch requires 'world_points' key. "
199
+ "Depthmap and Camera branches have been removed."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  )
201
 
202
+ pred_world_points = predictions["world_points"]
203
+
204
  # Get images from predictions
205
  images = predictions["images"]
206
  # Use extrinsic matrices instead of pred_extrinsic_list
207
  camera_matrices = predictions["extrinsic"]
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  if selected_frame_idx is not None:
210
  pred_world_points = pred_world_points[selected_frame_idx][None]
 
211
  images = images[selected_frame_idx][None]
212
  camera_matrices = camera_matrices[selected_frame_idx][None]
213
 
 
219
  colors_rgb = images
220
  colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)
221
 
222
+ # Create mask for filtering
223
+ mask = np.ones(len(vertices_3d), dtype=bool)
 
 
 
 
 
 
224
  final_mask = predictions["final_mask"].reshape(-1)
225
 
226
  if mask_black_bg:
227
+ black_bg_mask = colors_rgb.sum(axis=1) >= 16
228
+ mask = mask & black_bg_mask
229
 
230
  if mask_white_bg:
231
  # Filter out white background pixels (RGB values close to white)
232
  # Consider pixels white if all RGB values are above 240
233
  white_bg_mask = (
234
+ (colors_rgb[:, 0] > 240)
235
+ & (colors_rgb[:, 1] > 240)
236
+ & (colors_rgb[:, 2] > 240)
237
  )
238
+ mask = mask & ~white_bg_mask
239
 
240
+ # Use final_mask when mask_ambiguous is checked
241
  if mask_ambiguous:
242
+ mask = mask & final_mask
243
 
244
+ vertices_3d = vertices_3d[mask].copy()
245
+ colors_rgb = colors_rgb[mask].copy()
246
 
247
  if vertices_3d is None or np.asarray(vertices_3d).size == 0:
248
  vertices_3d = np.array([[1, 0, 0]])
 
278
  else: # Assume already in HWC format
279
  original_image_colors = images[0]
280
  original_image_colors *= 255
281
+ # Get original final mask
 
282
  original_final_mask = predictions["final_mask"][selected_frame_idx].reshape(
283
  H, W
284
  )
285
 
286
+ # Create mask based on final mask
287
+ mask = original_final_mask
 
 
288
 
289
  # Additional background masks if needed
290
  if mask_black_bg:
 
314
  original_points * np.array([1, -1, 1], dtype=np.float32),
315
  original_image_colors / 255.0,
316
  frame_normals * np.array([1, -1, 1], dtype=np.float32),
317
+ mask=mask,
318
  tri=True,
319
  return_indices=False,
320
  )
321
 
322
  # Apply coordinate transformations to normals
323
  vertex_normals = vertex_normals * np.array([1, -1, 1], dtype=np.float32)
 
324
  else:
325
  # Create faces and vertices using image_mesh without normals
326
  faces, vertices, vertex_colors = image_mesh(
327
  original_points * np.array([1, -1, 1], dtype=np.float32),
328
  original_image_colors / 255.0,
329
+ mask=mask,
330
  tri=True,
331
  return_indices=False,
332
  )
333
 
334
+ # vertices = vertices * np.array([1, -1, 1], dtype=np.float32)
335
 
336
  # Create trimesh object with optional normals
337
  mesh_data = trimesh.Trimesh(
338
+ vertices=vertices * np.array([1, -1, 1], dtype=np.float32),
339
  faces=faces,
340
  vertex_colors=(vertex_colors * 255).astype(np.uint8),
341
  vertex_normals=(vertex_normals if vertex_normals is not None else None),
 
352
 
353
  # Get data for this frame
354
  frame_points = pred_world_points[frame_idx]
 
355
  frame_final_mask = predictions["final_mask"][frame_idx]
356
 
357
  # Get frame image
 
360
  else: # Assume already in HWC format
361
  frame_image = images[frame_idx]
362
  frame_image *= 255
363
+ # Create mask for this frame using final_mask
364
+ mask = frame_final_mask
365
+
366
+ # Additional background masks if needed
367
+ if mask_black_bg:
368
+ black_bg_mask = frame_image.sum(axis=2) >= 16
369
+ mask = mask & black_bg_mask
370
+
371
+ if mask_white_bg:
372
+ white_bg_mask = ~(
373
+ (frame_image[:, :, 0] > 240)
374
+ & (frame_image[:, :, 1] > 240)
375
+ & (frame_image[:, :, 2] > 240)
376
+ )
377
+ mask = mask & white_bg_mask
378
 
379
  # Create mesh for this frame
380
  faces, vertices, vertex_colors = image_mesh(
381
  frame_points * np.array([1, -1, 1], dtype=np.float32),
382
  frame_image / 255.0,
383
+ mask=mask,
384
  tri=True,
385
  return_indices=False,
386
  )
 
400
 
401
  # Prepare 4x4 matrices for camera extrinsics
402
  num_cameras = len(camera_matrices)
 
 
 
403
 
404
  if show_cam:
405
  # Add camera models to the scene
 
410
  current_color = tuple(int(255 * x) for x in rgba_color[:3])
411
 
412
  integrate_camera_into_scene(
413
+ scene_3d, world_to_camera, current_color, scene_scale
414
  )
415
 
416
  # Align scene to the observation of the first camera