Spaces:

facebook
/

map-anything

Running on Zero

map-anything / app.py

aknapitsch user

common video and image upload field

478d9e0 about 1 month ago

60.4 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.
	# conda activate hf3.10

	import gc
	import os
	import shutil
	import sys
	import time
	from datetime import datetime

	os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

	import cv2
	import gradio as gr
	import numpy as np
	import spaces
	import torch
	from PIL import Image
	from pillow_heif import register_heif_opener

	register_heif_opener()

	sys.path.append("mapanything/")

	from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
	from mapanything.utils.hf_utils.css_and_html import (
	GRADIO_CSS,
	MEASURE_INSTRUCTIONS_HTML,
	get_acknowledgements_html,
	get_description_html,
	get_gradio_theme,
	get_header_html,
	)
	from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
	from mapanything.utils.hf_utils.visual_util import predictions_to_glb
	from mapanything.utils.image import load_images, rgb


	def get_logo_base64():
	"""Convert WAI logo to base64 for embedding in HTML"""
	import base64

	logo_path = "examples/WAI-Logo/wai_logo.png"
	try:
	with open(logo_path, "rb") as img_file:
	img_data = img_file.read()
	base64_str = base64.b64encode(img_data).decode()
	return f"data:image/png;base64,{base64_str}"
	except FileNotFoundError:
	return None


	# MapAnything Configuration
	high_level_config = {
	"path": "configs/train.yaml",
	"hf_model_name": "facebook/map-anything",
	"model_str": "mapanything",
	"config_overrides": [
	"machine=aws",
	"model=mapanything",
	"model/task=images_only",
	"model.encoder.uses_torch_hub=false",
	],
	"checkpoint_name": "model.safetensors",
	"config_name": "config.json",
	"trained_with_amp": True,
	"trained_with_amp_dtype": "bf16",
	"data_norm_type": "dinov2",
	"patch_size": 14,
	"resolution": 518,
	}

	# Initialize model - this will be done on GPU when needed
	model = None


	# -------------------------------------------------------------------------
	# 1) Core model inference
	# -------------------------------------------------------------------------
	@spaces.GPU(duration=120)
	def run_model(
	target_dir,
	apply_mask=True,
	mask_edges=True,
	filter_black_bg=False,
	filter_white_bg=False,
	):
	"""
	Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
	"""
	global model
	import torch # Ensure torch is available in function scope

	print(f"Processing images from {target_dir}")

	# Device check
	device = "cuda" if torch.cuda.is_available() else "cpu"
	device = torch.device(device)

	# Initialize model if not already done
	if model is None:
	model = initialize_mapanything_model(high_level_config, device)

	else:
	model = model.to(device)

	model.eval()

	# Load images using MapAnything's load_images function
	print("Loading images...")
	image_folder_path = os.path.join(target_dir, "images")
	views = load_images(image_folder_path)

	print(f"Loaded {len(views)} images")
	if len(views) == 0:
	raise ValueError("No images found. Check your upload.")

	# Run model inference
	print("Running inference...")
	# apply_mask: Whether to apply the non-ambiguous mask to the output. Defaults to True.
	# mask_edges: Whether to compute an edge mask based on normals and depth and apply it to the output. Defaults to True.
	# Use checkbox values - mask_edges is set to True by default since there's no UI control for it
	outputs = model.infer(
	views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
	)

	# Convert predictions to format expected by visualization
	predictions = {}

	# Initialize lists for the required keys
	extrinsic_list = []
	intrinsic_list = []
	world_points_list = []
	depth_maps_list = []
	images_list = []
	final_mask_list = []

	# Loop through the outputs
	for pred in outputs:
	# Extract data from predictions
	depthmap_torch = pred["depth_z"][0].squeeze(-1) # (H, W)
	intrinsics_torch = pred["intrinsics"][0] # (3, 3)
	camera_pose_torch = pred["camera_poses"][0] # (4, 4)

	# Compute new pts3d using depth, intrinsics, and camera pose
	pts3d_computed, valid_mask = depthmap_to_world_frame(
	depthmap_torch, intrinsics_torch, camera_pose_torch
	)

	# Convert to numpy arrays for visualization
	# Check if mask key exists in pred, if not, fill with boolean trues in the size of depthmap_torch
	if "mask" in pred:
	mask = pred["mask"][0].squeeze(-1).cpu().numpy().astype(bool)
	else:
	# Fill with boolean trues in the size of depthmap_torch
	mask = np.ones_like(depthmap_torch.cpu().numpy(), dtype=bool)

	# Combine with valid depth mask
	mask = mask & valid_mask.cpu().numpy()

	image = pred["img_no_norm"][0].cpu().numpy()

	# Append to lists
	extrinsic_list.append(camera_pose_torch.cpu().numpy())
	intrinsic_list.append(intrinsics_torch.cpu().numpy())
	world_points_list.append(pts3d_computed.cpu().numpy())
	depth_maps_list.append(depthmap_torch.cpu().numpy())
	images_list.append(image) # Add image to list
	final_mask_list.append(mask) # Add final_mask to list

	# Convert lists to numpy arrays with required shapes
	# extrinsic: (S, 3, 4) - batch of camera extrinsic matrices
	predictions["extrinsic"] = np.stack(extrinsic_list, axis=0)

	# intrinsic: (S, 3, 3) - batch of camera intrinsic matrices
	predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)

	# world_points: (S, H, W, 3) - batch of 3D world points
	predictions["world_points"] = np.stack(world_points_list, axis=0)

	# depth: (S, H, W, 1) or (S, H, W) - batch of depth maps
	depth_maps = np.stack(depth_maps_list, axis=0)
	# Add channel dimension if needed to match (S, H, W, 1) format
	if len(depth_maps.shape) == 3:
	depth_maps = depth_maps[..., np.newaxis]

	predictions["depth"] = depth_maps

	# images: (S, H, W, 3) - batch of input images
	predictions["images"] = np.stack(images_list, axis=0)

	# final_mask: (S, H, W) - batch of final masks for filtering
	predictions["final_mask"] = np.stack(final_mask_list, axis=0)

	# Process data for visualization tabs (depth, normal, measure)
	processed_data = process_predictions_for_visualization(
	predictions, views, high_level_config, filter_black_bg, filter_white_bg
	)

	# Clean up
	torch.cuda.empty_cache()

	return predictions, processed_data


	def update_view_selectors(processed_data):
	"""Update view selector dropdowns based on available views"""
	if processed_data is None or len(processed_data) == 0:
	choices = ["View 1"]
	else:
	num_views = len(processed_data)
	choices = [f"View {i + 1}" for i in range(num_views)]

	return (
	gr.Dropdown(choices=choices, value=choices[0]), # depth_view_selector
	gr.Dropdown(choices=choices, value=choices[0]), # normal_view_selector
	gr.Dropdown(choices=choices, value=choices[0]), # measure_view_selector
	)


	def get_view_data_by_index(processed_data, view_index):
	"""Get view data by index, handling bounds"""
	if processed_data is None or len(processed_data) == 0:
	return None

	view_keys = list(processed_data.keys())
	if view_index < 0 or view_index >= len(view_keys):
	view_index = 0

	return processed_data[view_keys[view_index]]


	def update_depth_view(processed_data, view_index):
	"""Update depth view for a specific view index"""
	view_data = get_view_data_by_index(processed_data, view_index)
	if view_data is None or view_data["depth"] is None:
	return None

	return colorize_depth(view_data["depth"], mask=view_data.get("mask"))


	def update_normal_view(processed_data, view_index):
	"""Update normal view for a specific view index"""
	view_data = get_view_data_by_index(processed_data, view_index)
	if view_data is None or view_data["normal"] is None:
	return None

	return colorize_normal(view_data["normal"], mask=view_data.get("mask"))


	def update_measure_view(processed_data, view_index):
	"""Update measure view for a specific view index with mask overlay"""
	view_data = get_view_data_by_index(processed_data, view_index)
	if view_data is None:
	return None, [] # image, measure_points

	# Get the base image
	image = view_data["image"].copy()

	# Ensure image is in uint8 format
	if image.dtype != np.uint8:
	if image.max() <= 1.0:
	image = (image * 255).astype(np.uint8)
	else:
	image = image.astype(np.uint8)

	# Apply mask overlay if mask is available
	if view_data["mask"] is not None:
	mask = view_data["mask"]

	# Create light grey overlay for masked areas
	# Masked areas (False values) will be overlaid with light grey
	invalid_mask = ~mask # Areas where mask is False

	if invalid_mask.any():
	# Create a light grey overlay (RGB: 192, 192, 192)
	overlay_color = np.array([255, 220, 220], dtype=np.uint8)

	# Apply overlay with some transparency
	alpha = 0.5 # Transparency level
	for c in range(3): # RGB channels
	image[:, :, c] = np.where(
	invalid_mask,
	(1 - alpha) * image[:, :, c] + alpha * overlay_color[c],
	image[:, :, c],
	).astype(np.uint8)

	return image, []


	def navigate_depth_view(processed_data, current_selector_value, direction):
	"""Navigate depth view (direction: -1 for previous, +1 for next)"""
	if processed_data is None or len(processed_data) == 0:
	return "View 1", None

	# Parse current view number
	try:
	current_view = int(current_selector_value.split()[1]) - 1
	except:
	current_view = 0

	num_views = len(processed_data)
	new_view = (current_view + direction) % num_views

	new_selector_value = f"View {new_view + 1}"
	depth_vis = update_depth_view(processed_data, new_view)

	return new_selector_value, depth_vis


	def navigate_normal_view(processed_data, current_selector_value, direction):
	"""Navigate normal view (direction: -1 for previous, +1 for next)"""
	if processed_data is None or len(processed_data) == 0:
	return "View 1", None

	# Parse current view number
	try:
	current_view = int(current_selector_value.split()[1]) - 1
	except:
	current_view = 0

	num_views = len(processed_data)
	new_view = (current_view + direction) % num_views

	new_selector_value = f"View {new_view + 1}"
	normal_vis = update_normal_view(processed_data, new_view)

	return new_selector_value, normal_vis


	def navigate_measure_view(processed_data, current_selector_value, direction):
	"""Navigate measure view (direction: -1 for previous, +1 for next)"""
	if processed_data is None or len(processed_data) == 0:
	return "View 1", None, []

	# Parse current view number
	try:
	current_view = int(current_selector_value.split()[1]) - 1
	except:
	current_view = 0

	num_views = len(processed_data)
	new_view = (current_view + direction) % num_views

	new_selector_value = f"View {new_view + 1}"
	measure_image, measure_points = update_measure_view(processed_data, new_view)

	return new_selector_value, measure_image, measure_points


	def populate_visualization_tabs(processed_data):
	"""Populate the depth, normal, and measure tabs with processed data"""
	if processed_data is None or len(processed_data) == 0:
	return None, None, None, []

	# Use update functions to ensure confidence filtering is applied from the start
	depth_vis = update_depth_view(processed_data, 0)
	normal_vis = update_normal_view(processed_data, 0)
	measure_img, _ = update_measure_view(processed_data, 0)

	return depth_vis, normal_vis, measure_img, []


	# -------------------------------------------------------------------------
	# 2) Handle uploaded video/images --> produce target_dir + images
	# -------------------------------------------------------------------------
	def handle_uploads(unified_upload, s_time_interval=1.0):
	"""
	Create a new 'target_dir' + 'images' subfolder, and place user-uploaded
	images or extracted frames from video into it. Return (target_dir, image_paths).
	"""
	start_time = time.time()
	gc.collect()
	torch.cuda.empty_cache()

	# Create a unique folder name
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
	target_dir = f"input_images_{timestamp}"
	target_dir_images = os.path.join(target_dir, "images")

	# Clean up if somehow that folder already exists
	if os.path.exists(target_dir):
	shutil.rmtree(target_dir)
	os.makedirs(target_dir)
	os.makedirs(target_dir_images)

	image_paths = []

	# --- Handle uploaded files (both images and videos) ---
	if unified_upload is not None:
	for file_data in unified_upload:
	if isinstance(file_data, dict) and "name" in file_data:
	file_path = file_data["name"]
	else:
	file_path = str(file_data)

	file_ext = os.path.splitext(file_path)[1].lower()

	# Check if it's a video file
	video_extensions = [
	".mp4",
	".avi",
	".mov",
	".mkv",
	".wmv",
	".flv",
	".webm",
	".m4v",
	".3gp",
	]
	if file_ext in video_extensions:
	# Handle as video
	vs = cv2.VideoCapture(file_path)
	fps = vs.get(cv2.CAP_PROP_FPS)
	frame_interval = int(fps * s_time_interval) # frames per interval

	count = 0
	video_frame_num = 0
	while True:
	gotit, frame = vs.read()
	if not gotit:
	break
	count += 1
	if count % frame_interval == 0:
	# Use original filename as prefix for frames
	base_name = os.path.splitext(os.path.basename(file_path))[0]
	image_path = os.path.join(
	target_dir_images, f"{base_name}_{video_frame_num:06}.png"
	)
	cv2.imwrite(image_path, frame)
	image_paths.append(image_path)
	video_frame_num += 1
	vs.release()
	print(
	f"Extracted {video_frame_num} frames from video: {os.path.basename(file_path)}"
	)

	else:
	# Handle as image
	# Check if the file is a HEIC image
	if file_ext in [".heic", ".heif"]:
	# Convert HEIC to JPEG for better gallery compatibility
	try:
	with Image.open(file_path) as img:
	# Convert to RGB if necessary (HEIC can have different color modes)
	if img.mode not in ("RGB", "L"):
	img = img.convert("RGB")

	# Create JPEG filename
	base_name = os.path.splitext(os.path.basename(file_path))[0]
	dst_path = os.path.join(
	target_dir_images, f"{base_name}.jpg"
	)

	# Save as JPEG with high quality
	img.save(dst_path, "JPEG", quality=95)
	image_paths.append(dst_path)
	print(
	f"Converted HEIC to JPEG: {os.path.basename(file_path)} -> {os.path.basename(dst_path)}"
	)
	except Exception as e:
	print(f"Error converting HEIC file {file_path}: {e}")
	# Fall back to copying as is
	dst_path = os.path.join(
	target_dir_images, os.path.basename(file_path)
	)
	shutil.copy(file_path, dst_path)
	image_paths.append(dst_path)
	else:
	# Regular image files - copy as is
	dst_path = os.path.join(
	target_dir_images, os.path.basename(file_path)
	)
	shutil.copy(file_path, dst_path)
	image_paths.append(dst_path)

	# Sort final images for gallery
	image_paths = sorted(image_paths)

	end_time = time.time()
	print(
	f"Files processed to {target_dir_images}; took {end_time - start_time:.3f} seconds"
	)
	return target_dir, image_paths


	# -------------------------------------------------------------------------
	# 3) Update gallery on upload
	# -------------------------------------------------------------------------
	def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0):
	"""
	Whenever user uploads or changes files, immediately handle them
	and show in the gallery. Return (target_dir, image_paths).
	If nothing is uploaded, returns "None" and empty list.
	"""
	if not input_video and not input_images:
	return None, None, None, None
	target_dir, image_paths = handle_uploads(input_video, input_images, s_time_interval)
	return (
	None,
	target_dir,
	image_paths,
	"Upload complete. Click 'Reconstruct' to begin 3D processing.",
	)


	# -------------------------------------------------------------------------
	# 4) Reconstruction: uses the target_dir plus any viz parameters
	# -------------------------------------------------------------------------
	@spaces.GPU(duration=120)
	def gradio_demo(
	target_dir,
	frame_filter="All",
	show_cam=True,
	filter_black_bg=False,
	filter_white_bg=False,
	apply_mask=True,
	show_mesh=True,
	):
	"""
	Perform reconstruction using the already-created target_dir/images.
	"""
	if not os.path.isdir(target_dir) or target_dir == "None":
	return None, "No valid target directory found. Please upload first.", None, None

	start_time = time.time()
	gc.collect()
	torch.cuda.empty_cache()

	# Prepare frame_filter dropdown
	target_dir_images = os.path.join(target_dir, "images")
	all_files = (
	sorted(os.listdir(target_dir_images))
	if os.path.isdir(target_dir_images)
	else []
	)
	all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
	frame_filter_choices = ["All"] + all_files

	print("Running MapAnything model...")
	with torch.no_grad():
	predictions, processed_data = run_model(target_dir, apply_mask)

	# Save predictions
	prediction_save_path = os.path.join(target_dir, "predictions.npz")
	np.savez(prediction_save_path, **predictions)

	# Handle None frame_filter
	if frame_filter is None:
	frame_filter = "All"

	# Build a GLB file name
	glbfile = os.path.join(
	target_dir,
	f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
	)

	# Convert predictions to GLB
	glbscene = predictions_to_glb(
	predictions,
	filter_by_frames=frame_filter,
	show_cam=show_cam,
	mask_black_bg=filter_black_bg,
	mask_white_bg=filter_white_bg,
	as_mesh=show_mesh, # Use the show_mesh parameter
	)
	glbscene.export(file_obj=glbfile)

	# Cleanup
	del predictions
	gc.collect()
	torch.cuda.empty_cache()

	end_time = time.time()
	print(f"Total time: {end_time - start_time:.2f} seconds")
	log_msg = (
	f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
	)

	# Populate visualization tabs with processed data
	depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(
	processed_data
	)

	# Update view selectors based on available views
	depth_selector, normal_selector, measure_selector = update_view_selectors(
	processed_data
	)

	return (
	glbfile,
	log_msg,
	gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),
	processed_data,
	depth_vis,
	normal_vis,
	measure_img,
	"", # measure_text (empty initially)
	depth_selector,
	normal_selector,
	measure_selector,
	)


	# -------------------------------------------------------------------------
	# 5) Helper functions for UI resets + re-visualization
	# -------------------------------------------------------------------------
	def colorize_depth(depth_map, mask=None):
	"""Convert depth map to colorized visualization with optional mask"""
	if depth_map is None:
	return None

	# Normalize depth to 0-1 range
	depth_normalized = depth_map.copy()
	valid_mask = depth_normalized > 0

	# Apply additional mask if provided (for background filtering)
	if mask is not None:
	valid_mask = valid_mask & mask

	if valid_mask.sum() > 0:
	valid_depths = depth_normalized[valid_mask]
	p5 = np.percentile(valid_depths, 5)
	p95 = np.percentile(valid_depths, 95)

	depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5)

	# Apply colormap
	import matplotlib.pyplot as plt

	colormap = plt.cm.turbo_r
	colored = colormap(depth_normalized)
	colored = (colored[:, :, :3] * 255).astype(np.uint8)

	# Set invalid pixels to white
	colored[~valid_mask] = [255, 255, 255]

	return colored


	def colorize_normal(normal_map, mask=None):
	"""Convert normal map to colorized visualization with optional mask"""
	if normal_map is None:
	return None

	# Create a copy for modification
	normal_vis = normal_map.copy()

	# Apply mask if provided (set masked areas to [0, 0, 0] which becomes grey after normalization)
	if mask is not None:
	invalid_mask = ~mask
	normal_vis[invalid_mask] = [0, 0, 0] # Set invalid areas to zero

	# Normalize normals to [0, 1] range for visualization
	normal_vis = (normal_vis + 1.0) / 2.0
	normal_vis = (normal_vis * 255).astype(np.uint8)

	return normal_vis


	def process_predictions_for_visualization(
	predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
	):
	"""Extract depth, normal, and 3D points from predictions for visualization"""
	processed_data = {}

	# Process each view
	for view_idx, view in enumerate(views):
	# Get image
	image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])

	# Get predicted points
	pred_pts3d = predictions["world_points"][view_idx]

	# Initialize data for this view
	view_data = {
	"image": image[0],
	"points3d": pred_pts3d,
	"depth": None,
	"normal": None,
	"mask": None,
	}

	# Start with the final mask from predictions
	mask = predictions["final_mask"][view_idx].copy()

	# Apply black background filtering if enabled
	if filter_black_bg:
	# Get the image colors (ensure they're in 0-255 range)
	view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
	# Filter out black background pixels (sum of RGB < 16)
	black_bg_mask = view_colors.sum(axis=2) >= 16
	mask = mask & black_bg_mask

	# Apply white background filtering if enabled
	if filter_white_bg:
	# Get the image colors (ensure they're in 0-255 range)
	view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
	# Filter out white background pixels (all RGB > 240)
	white_bg_mask = ~(
	(view_colors[:, :, 0] > 240)
	& (view_colors[:, :, 1] > 240)
	& (view_colors[:, :, 2] > 240)
	)
	mask = mask & white_bg_mask

	view_data["mask"] = mask
	view_data["depth"] = predictions["depth"][view_idx].squeeze()

	normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
	view_data["normal"] = normals

	processed_data[view_idx] = view_data

	return processed_data


	def reset_measure(processed_data):
	"""Reset measure points"""
	if processed_data is None or len(processed_data) == 0:
	return None, [], ""

	# Return the first view image
	first_view = list(processed_data.values())[0]
	return first_view["image"], [], ""


	def measure(
	processed_data, measure_points, current_view_selector, event: gr.SelectData
	):
	"""Handle measurement on images"""
	try:
	print(f"Measure function called with selector: {current_view_selector}")

	if processed_data is None or len(processed_data) == 0:
	return None, [], "No data available"

	# Use the currently selected view instead of always using the first view
	try:
	current_view_index = int(current_view_selector.split()[1]) - 1
	except:
	current_view_index = 0

	print(f"Using view index: {current_view_index}")

	# Get view data safely
	if current_view_index < 0 or current_view_index >= len(processed_data):
	current_view_index = 0

	view_keys = list(processed_data.keys())
	current_view = processed_data[view_keys[current_view_index]]

	if current_view is None:
	return None, [], "No view data available"

	point2d = event.index[0], event.index[1]
	print(f"Clicked point: {point2d}")

	# Check if the clicked point is in a masked area (prevent interaction)
	if (
	current_view["mask"] is not None
	and 0 <= point2d[1] < current_view["mask"].shape[0]
	and 0 <= point2d[0] < current_view["mask"].shape[1]
	):
	# Check if the point is in a masked (invalid) area
	if not current_view["mask"][point2d[1], point2d[0]]:
	print(f"Clicked point {point2d} is in masked area, ignoring click")
	# Always return image with mask overlay
	masked_image, _ = update_measure_view(
	processed_data, current_view_index
	)
	return (
	masked_image,
	measure_points,
	'<span style="color: red; font-weight: bold;">Cannot measure on masked areas (shown in grey)</span>',
	)

	measure_points.append(point2d)

	# Get image with mask overlay and ensure it's valid
	image, _ = update_measure_view(processed_data, current_view_index)
	if image is None:
	return None, [], "No image available"

	image = image.copy()
	points3d = current_view["points3d"]

	# Ensure image is in uint8 format for proper cv2 operations
	try:
	if image.dtype != np.uint8:
	if image.max() <= 1.0:
	# Image is in [0, 1] range, convert to [0, 255]
	image = (image * 255).astype(np.uint8)
	else:
	# Image is already in [0, 255] range
	image = image.astype(np.uint8)
	except Exception as e:
	print(f"Image conversion error: {e}")
	return None, [], f"Image conversion error: {e}"

	# Draw circles for points
	try:
	for p in measure_points:
	if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
	image = cv2.circle(
	image, p, radius=5, color=(255, 0, 0), thickness=2
	)
	except Exception as e:
	print(f"Drawing error: {e}")
	return None, [], f"Drawing error: {e}"

	depth_text = ""
	try:
	for i, p in enumerate(measure_points):
	if (
	current_view["depth"] is not None
	and 0 <= p[1] < current_view["depth"].shape[0]
	and 0 <= p[0] < current_view["depth"].shape[1]
	):
	d = current_view["depth"][p[1], p[0]]
	depth_text += f"- P{i + 1} depth: {d:.2f}m.\n"
	else:
	# Use Z coordinate of 3D points if depth not available
	if (
	points3d is not None
	and 0 <= p[1] < points3d.shape[0]
	and 0 <= p[0] < points3d.shape[1]
	):
	z = points3d[p[1], p[0], 2]
	depth_text += f"- P{i + 1} Z-coord: {z:.2f}m.\n"
	except Exception as e:
	print(f"Depth text error: {e}")
	depth_text = f"Error computing depth: {e}\n"

	if len(measure_points) == 2:
	try:
	point1, point2 = measure_points
	# Draw line
	if (
	0 <= point1[0] < image.shape[1]
	and 0 <= point1[1] < image.shape[0]
	and 0 <= point2[0] < image.shape[1]
	and 0 <= point2[1] < image.shape[0]
	):
	image = cv2.line(
	image, point1, point2, color=(255, 0, 0), thickness=2
	)

	# Compute 3D distance
	distance_text = "- Distance: Unable to compute"
	if (
	points3d is not None
	and 0 <= point1[1] < points3d.shape[0]
	and 0 <= point1[0] < points3d.shape[1]
	and 0 <= point2[1] < points3d.shape[0]
	and 0 <= point2[0] < points3d.shape[1]
	):
	try:
	p1_3d = points3d[point1[1], point1[0]]
	p2_3d = points3d[point2[1], point2[0]]
	distance = np.linalg.norm(p1_3d - p2_3d)
	distance_text = f"- Distance: {distance:.2f}m"
	except Exception as e:
	print(f"Distance computation error: {e}")
	distance_text = f"- Distance computation error: {e}"

	measure_points = []
	text = depth_text + distance_text
	print(f"Measurement complete: {text}")
	return [image, measure_points, text]
	except Exception as e:
	print(f"Final measurement error: {e}")
	return None, [], f"Measurement error: {e}"
	else:
	print(f"Single point measurement: {depth_text}")
	return [image, measure_points, depth_text]

	except Exception as e:
	print(f"Overall measure function error: {e}")
	return None, [], f"Measure function error: {e}"


	def clear_fields():
	"""
	Clears the 3D viewer, the stored target_dir, and empties the gallery.
	"""
	return None


	def update_log():
	"""
	Display a quick log message while waiting.
	"""
	return "Loading and Reconstructing..."


	def update_visualization(
	target_dir,
	frame_filter,
	show_cam,
	is_example,
	filter_black_bg=False,
	filter_white_bg=False,
	show_mesh=True,
	):
	"""
	Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
	and return it for the 3D viewer. If is_example == "True", skip.
	"""

	# If it's an example click, skip as requested
	if is_example == "True":
	return (
	gr.update(),
	"No reconstruction available. Please click the Reconstruct button first.",
	)

	if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
	return (
	gr.update(),
	"No reconstruction available. Please click the Reconstruct button first.",
	)

	predictions_path = os.path.join(target_dir, "predictions.npz")
	if not os.path.exists(predictions_path):
	return (
	gr.update(),
	f"No reconstruction available at {predictions_path}. Please run 'Reconstruct' first.",
	)

	loaded = np.load(predictions_path, allow_pickle=True)
	predictions = {key: loaded[key] for key in loaded.keys()}

	glbfile = os.path.join(
	target_dir,
	f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
	)

	if not os.path.exists(glbfile):
	glbscene = predictions_to_glb(
	predictions,
	filter_by_frames=frame_filter,
	show_cam=show_cam,
	mask_black_bg=filter_black_bg,
	mask_white_bg=filter_white_bg,
	as_mesh=show_mesh,
	)
	glbscene.export(file_obj=glbfile)

	return (
	glbfile,
	"Visualization updated.",
	)


	def update_all_views_on_filter_change(
	target_dir,
	filter_black_bg,
	filter_white_bg,
	processed_data,
	depth_view_selector,
	normal_view_selector,
	measure_view_selector,
	):
	"""
	Update all individual view tabs when background filtering checkboxes change.
	This regenerates the processed data with new filtering and updates all views.
	"""
	# Check if we have a valid target directory and predictions
	if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
	return processed_data, None, None, None, []

	predictions_path = os.path.join(target_dir, "predictions.npz")
	if not os.path.exists(predictions_path):
	return processed_data, None, None, None, []

	try:
	# Load the original predictions and views
	loaded = np.load(predictions_path, allow_pickle=True)
	predictions = {key: loaded[key] for key in loaded.keys()}

	# Load images using MapAnything's load_images function
	image_folder_path = os.path.join(target_dir, "images")
	views = load_images(image_folder_path)

	# Regenerate processed data with new filtering settings
	new_processed_data = process_predictions_for_visualization(
	predictions, views, high_level_config, filter_black_bg, filter_white_bg
	)

	# Get current view indices
	try:
	depth_view_idx = (
	int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
	)
	except:
	depth_view_idx = 0

	try:
	normal_view_idx = (
	int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
	)
	except:
	normal_view_idx = 0

	try:
	measure_view_idx = (
	int(measure_view_selector.split()[1]) - 1
	if measure_view_selector
	else 0
	)
	except:
	measure_view_idx = 0

	# Update all views with new filtered data
	depth_vis = update_depth_view(new_processed_data, depth_view_idx)
	normal_vis = update_normal_view(new_processed_data, normal_view_idx)
	measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)

	return new_processed_data, depth_vis, normal_vis, measure_img, []

	except Exception as e:
	print(f"Error updating views on filter change: {e}")
	return processed_data, None, None, None, []


	# -------------------------------------------------------------------------
	# Example scene functions
	# -------------------------------------------------------------------------
	def get_scene_info(examples_dir):
	"""Get information about scenes in the examples directory"""
	import glob

	scenes = []
	if not os.path.exists(examples_dir):
	return scenes

	for scene_folder in sorted(os.listdir(examples_dir)):
	scene_path = os.path.join(examples_dir, scene_folder)
	if os.path.isdir(scene_path):
	# Find all image files in the scene folder
	image_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
	image_files = []
	for ext in image_extensions:
	image_files.extend(glob.glob(os.path.join(scene_path, ext)))
	image_files.extend(glob.glob(os.path.join(scene_path, ext.upper())))

	if image_files:
	# Sort images and get the first one for thumbnail
	image_files = sorted(image_files)
	first_image = image_files[0]
	num_images = len(image_files)

	scenes.append(
	{
	"name": scene_folder,
	"path": scene_path,
	"thumbnail": first_image,
	"num_images": num_images,
	"image_files": image_files,
	}
	)

	return scenes


	def load_example_scene(scene_name, examples_dir="examples"):
	"""Load a scene from examples directory"""
	scenes = get_scene_info(examples_dir)

	# Find the selected scene
	selected_scene = None
	for scene in scenes:
	if scene["name"] == scene_name:
	selected_scene = scene
	break

	if selected_scene is None:
	return None, None, None, "Scene not found"

	# Create file-like objects for the unified upload system
	# Convert image file paths to the format expected by unified_upload
	file_objects = []
	for image_path in selected_scene["image_files"]:
	file_objects.append(image_path)

	# Create target directory and copy images using the unified upload system
	target_dir, image_paths = handle_uploads(file_objects, 1.0)

	return (
	None, # Clear reconstruction output
	target_dir, # Set target directory
	image_paths, # Set gallery
	f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. Click 'Reconstruct' to begin 3D processing.",
	)


	# -------------------------------------------------------------------------
	# 6) Build Gradio UI
	# -------------------------------------------------------------------------
	theme = get_gradio_theme()

	with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
	# State variables for the tabbed interface
	is_example = gr.Textbox(label="is_example", visible=False, value="None")
	num_images = gr.Textbox(label="num_images", visible=False, value="None")
	processed_data_state = gr.State(value=None)
	measure_points_state = gr.State(value=[])
	current_view_index = gr.State(value=0) # Track current view index for navigation

	gr.HTML(get_header_html(get_logo_base64()))
	gr.HTML(get_description_html())

	target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")

	with gr.Row():
	with gr.Column(scale=2):
	# Unified upload component for both videos and images
	unified_upload = gr.File(
	file_count="multiple",
	label="Upload Video or Images",
	interactive=True,
	file_types=["image", "video"],
	)
	with gr.Row():
	s_time_interval = gr.Slider(
	minimum=0.1,
	maximum=5.0,
	value=1.0,
	step=0.1,
	label="Video sample time interval (take a sample every x sec.)",
	interactive=True,
	visible=True,
	scale=3,
	)
	resample_btn = gr.Button(
	"Resample Video",
	visible=False,
	variant="secondary",
	scale=1,
	)

	image_gallery = gr.Gallery(
	label="Preview",
	columns=4,
	height="300px",
	show_download_button=True,
	object_fit="contain",
	preview=True,
	)

	clear_uploads_btn = gr.ClearButton(
	[unified_upload, image_gallery],
	value="Clear Uploads",
	variant="secondary",
	size="sm",
	)

	with gr.Column(scale=4):
	with gr.Column():
	gr.Markdown(
	"Metric 3D Reconstruction (Point Cloud and Camera Poses)"
	)
	log_output = gr.Markdown(
	"Please upload a video or images, then click Reconstruct.",
	elem_classes=["custom-log"],
	)

	# Add tabbed interface similar to MoGe
	with gr.Tabs():
	with gr.Tab("3D View"):
	reconstruction_output = gr.Model3D(
	height=520,
	zoom_speed=0.5,
	pan_speed=0.5,
	clear_color=[0.0, 0.0, 0.0, 0.0],
	key="persistent_3d_viewer",
	elem_id="reconstruction_3d_viewer",
	)
	with gr.Tab("Depth"):
	with gr.Row(elem_classes=["navigation-row"]):
	prev_depth_btn = gr.Button("◀ Previous", size="sm", scale=1)
	depth_view_selector = gr.Dropdown(
	choices=["View 1"],
	value="View 1",
	label="Select View",
	scale=2,
	interactive=True,
	allow_custom_value=True,
	)
	next_depth_btn = gr.Button("Next ▶", size="sm", scale=1)
	depth_map = gr.Image(
	type="numpy",
	label="Colorized Depth Map",
	format="png",
	interactive=False,
	)
	with gr.Tab("Normal"):
	with gr.Row(elem_classes=["navigation-row"]):
	prev_normal_btn = gr.Button(
	"◀ Previous", size="sm", scale=1
	)
	normal_view_selector = gr.Dropdown(
	choices=["View 1"],
	value="View 1",
	label="Select View",
	scale=2,
	interactive=True,
	allow_custom_value=True,
	)
	next_normal_btn = gr.Button("Next ▶", size="sm", scale=1)
	normal_map = gr.Image(
	type="numpy",
	label="Normal Map",
	format="png",
	interactive=False,
	)
	with gr.Tab("Measure"):
	gr.Markdown(MEASURE_INSTRUCTIONS_HTML)
	with gr.Row(elem_classes=["navigation-row"]):
	prev_measure_btn = gr.Button(
	"◀ Previous", size="sm", scale=1
	)
	measure_view_selector = gr.Dropdown(
	choices=["View 1"],
	value="View 1",
	label="Select View",
	scale=2,
	interactive=True,
	allow_custom_value=True,
	)
	next_measure_btn = gr.Button("Next ▶", size="sm", scale=1)
	measure_image = gr.Image(
	type="numpy",
	show_label=False,
	format="webp",
	interactive=False,
	sources=[],
	)
	gr.Markdown(
	"Note: Light-grey areas indicate regions with no depth information where measurements cannot be taken."
	)
	measure_text = gr.Markdown("")

	with gr.Row():
	submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
	clear_btn = gr.ClearButton(
	[
	unified_upload,
	reconstruction_output,
	log_output,
	target_dir_output,
	image_gallery,
	],
	scale=1,
	)

	with gr.Row():
	frame_filter = gr.Dropdown(
	choices=["All"], value="All", label="Show Points from Frame"
	)
	with gr.Column():
	gr.Markdown("### Pointcloud Options: (live updates)")
	show_cam = gr.Checkbox(label="Show Camera", value=True)
	show_mesh = gr.Checkbox(label="Show Mesh", value=True)
	filter_black_bg = gr.Checkbox(
	label="Filter Black Background", value=False
	)
	filter_white_bg = gr.Checkbox(
	label="Filter White Background", value=False
	)
	gr.Markdown("### Reconstruction Options: (updated on next run)")
	apply_mask_checkbox = gr.Checkbox(
	label="Apply mask for predicted ambiguous depth classes & edges",
	value=True,
	)
	# ---------------------- Example Scenes Section ----------------------
	gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
	gr.Markdown("Click any thumbnail to load the scene for reconstruction.")

	# Get scene information
	scenes = get_scene_info("examples")

	# Create thumbnail grid (4 columns, N rows)
	if scenes:
	for i in range(0, len(scenes), 4): # Process 4 scenes per row
	with gr.Row():
	for j in range(4):
	scene_idx = i + j
	if scene_idx < len(scenes):
	scene = scenes[scene_idx]
	with gr.Column(scale=1, elem_classes=["clickable-thumbnail"]):
	# Clickable thumbnail
	scene_img = gr.Image(
	value=scene["thumbnail"],
	height=150,
	interactive=False,
	show_label=False,
	elem_id=f"scene_thumb_{scene['name']}",
	sources=[],
	)

	# Scene name and image count as text below thumbnail
	gr.Markdown(
	f"{scene['name']} \n {scene['num_images']} images",
	elem_classes=["scene-info"],
	)

	# Connect thumbnail click to load scene
	scene_img.select(
	fn=lambda name=scene["name"]: load_example_scene(name),
	outputs=[
	reconstruction_output,
	target_dir_output,
	image_gallery,
	log_output,
	],
	)
	else:
	# Empty column to maintain grid structure
	with gr.Column(scale=1):
	pass

	# -------------------------------------------------------------------------
	# "Reconstruct" button logic:
	# - Clear fields
	# - Update log
	# - gradio_demo(...) with the existing target_dir
	# - Then set is_example = "False"
	# -------------------------------------------------------------------------
	submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then(
	fn=update_log, inputs=[], outputs=[log_output]
	).then(
	fn=gradio_demo,
	inputs=[
	target_dir_output,
	frame_filter,
	show_cam,
	filter_black_bg,
	filter_white_bg,
	apply_mask_checkbox,
	show_mesh,
	],
	outputs=[
	reconstruction_output,
	log_output,
	frame_filter,
	processed_data_state,
	depth_map,
	normal_map,
	measure_image,
	measure_text,
	depth_view_selector,
	normal_view_selector,
	measure_view_selector,
	],
	).then(
	fn=lambda: "False",
	inputs=[],
	outputs=[is_example], # set is_example to "False"
	)

	# -------------------------------------------------------------------------
	# Real-time Visualization Updates
	# -------------------------------------------------------------------------
	frame_filter.change(
	update_visualization,
	[
	target_dir_output,
	frame_filter,
	show_cam,
	is_example,
	filter_black_bg,
	filter_white_bg,
	show_mesh,
	],
	[reconstruction_output, log_output],
	)
	show_cam.change(
	update_visualization,
	[
	target_dir_output,
	frame_filter,
	show_cam,
	is_example,
	],
	[reconstruction_output, log_output],
	)
	filter_black_bg.change(
	update_visualization,
	[
	target_dir_output,
	frame_filter,
	show_cam,
	is_example,
	filter_black_bg,
	filter_white_bg,
	],
	[reconstruction_output, log_output],
	).then(
	fn=update_all_views_on_filter_change,
	inputs=[
	target_dir_output,
	filter_black_bg,
	filter_white_bg,
	processed_data_state,
	depth_view_selector,
	normal_view_selector,
	measure_view_selector,
	],
	outputs=[
	processed_data_state,
	depth_map,
	normal_map,
	measure_image,
	measure_points_state,
	],
	)
	filter_white_bg.change(
	update_visualization,
	[
	target_dir_output,
	frame_filter,
	show_cam,
	is_example,
	filter_black_bg,
	filter_white_bg,
	show_mesh,
	],
	[reconstruction_output, log_output],
	).then(
	fn=update_all_views_on_filter_change,
	inputs=[
	target_dir_output,
	filter_black_bg,
	filter_white_bg,
	processed_data_state,
	depth_view_selector,
	normal_view_selector,
	measure_view_selector,
	],
	outputs=[
	processed_data_state,
	depth_map,
	normal_map,
	measure_image,
	measure_points_state,
	],
	)

	show_mesh.change(
	update_visualization,
	[
	target_dir_output,
	frame_filter,
	show_cam,
	is_example,
	filter_black_bg,
	filter_white_bg,
	show_mesh,
	],
	[reconstruction_output, log_output],
	)

	# -------------------------------------------------------------------------
	# Auto-update gallery whenever user uploads or changes their files
	# -------------------------------------------------------------------------
	def update_gallery_on_unified_upload(files, interval):
	if not files:
	return None, None, None
	target_dir, image_paths = handle_uploads(files, interval)
	return (
	target_dir,
	image_paths,
	"Upload complete. Click 'Reconstruct' to begin 3D processing.",
	)

	def show_resample_button(files):
	"""Show the resample button only if there are uploaded files containing videos"""
	if not files:
	return gr.update(visible=False)

	# Check if any uploaded files are videos
	video_extensions = [
	".mp4",
	".avi",
	".mov",
	".mkv",
	".wmv",
	".flv",
	".webm",
	".m4v",
	".3gp",
	]
	has_video = False

	for file_data in files:
	if isinstance(file_data, dict) and "name" in file_data:
	file_path = file_data["name"]
	else:
	file_path = str(file_data)

	file_ext = os.path.splitext(file_path)[1].lower()
	if file_ext in video_extensions:
	has_video = True
	break

	return gr.update(visible=has_video)

	def hide_resample_button():
	"""Hide the resample button after use"""
	return gr.update(visible=False)

	def resample_video_with_new_interval(files, new_interval, current_target_dir):
	"""Resample video with new slider value"""
	if not files:
	return (
	current_target_dir,
	None,
	"No files to resample.",
	gr.update(visible=False),
	)

	# Check if we have videos to resample
	video_extensions = [
	".mp4",
	".avi",
	".mov",
	".mkv",
	".wmv",
	".flv",
	".webm",
	".m4v",
	".3gp",
	]
	has_video = any(
	os.path.splitext(
	str(file_data["name"] if isinstance(file_data, dict) else file_data)
	)[1].lower()
	in video_extensions
	for file_data in files
	)

	if not has_video:
	return (
	current_target_dir,
	None,
	"No videos found to resample.",
	gr.update(visible=False),
	)

	# Clean up old target directory if it exists
	if (
	current_target_dir
	and current_target_dir != "None"
	and os.path.exists(current_target_dir)
	):
	shutil.rmtree(current_target_dir)

	# Process files with new interval
	target_dir, image_paths = handle_uploads(files, new_interval)

	return (
	target_dir,
	image_paths,
	f"Video resampled with {new_interval}s interval. Click 'Reconstruct' to begin 3D processing.",
	gr.update(visible=False),
	)

	unified_upload.change(
	fn=update_gallery_on_unified_upload,
	inputs=[unified_upload, s_time_interval],
	outputs=[target_dir_output, image_gallery, log_output],
	).then(
	fn=show_resample_button,
	inputs=[unified_upload],
	outputs=[resample_btn],
	)

	# Show resample button when slider changes (only if files are uploaded)
	s_time_interval.change(
	fn=show_resample_button,
	inputs=[unified_upload],
	outputs=[resample_btn],
	)

	# Handle resample button click
	resample_btn.click(
	fn=resample_video_with_new_interval,
	inputs=[unified_upload, s_time_interval, target_dir_output],
	outputs=[target_dir_output, image_gallery, log_output, resample_btn],
	)

	# -------------------------------------------------------------------------
	# Measure tab functionality
	# -------------------------------------------------------------------------
	measure_image.select(
	fn=measure,
	inputs=[processed_data_state, measure_points_state, measure_view_selector],
	outputs=[measure_image, measure_points_state, measure_text],
	)

	# -------------------------------------------------------------------------
	# Navigation functionality for Depth, Normal, and Measure tabs
	# -------------------------------------------------------------------------

	# Depth tab navigation
	prev_depth_btn.click(
	fn=lambda processed_data, current_selector: navigate_depth_view(
	processed_data, current_selector, -1
	),
	inputs=[processed_data_state, depth_view_selector],
	outputs=[depth_view_selector, depth_map],
	)

	next_depth_btn.click(
	fn=lambda processed_data, current_selector: navigate_depth_view(
	processed_data, current_selector, 1
	),
	inputs=[processed_data_state, depth_view_selector],
	outputs=[depth_view_selector, depth_map],
	)

	depth_view_selector.change(
	fn=lambda processed_data, selector_value: (
	update_depth_view(
	processed_data,
	int(selector_value.split()[1]) - 1,
	)
	if selector_value
	else None
	),
	inputs=[processed_data_state, depth_view_selector],
	outputs=[depth_map],
	)

	# Normal tab navigation
	prev_normal_btn.click(
	fn=lambda processed_data, current_selector: navigate_normal_view(
	processed_data, current_selector, -1
	),
	inputs=[processed_data_state, normal_view_selector],
	outputs=[normal_view_selector, normal_map],
	)

	next_normal_btn.click(
	fn=lambda processed_data, current_selector: navigate_normal_view(
	processed_data, current_selector, 1
	),
	inputs=[processed_data_state, normal_view_selector],
	outputs=[normal_view_selector, normal_map],
	)

	normal_view_selector.change(
	fn=lambda processed_data, selector_value: (
	update_normal_view(
	processed_data,
	int(selector_value.split()[1]) - 1,
	)
	if selector_value
	else None
	),
	inputs=[processed_data_state, normal_view_selector],
	outputs=[normal_map],
	)

	# Measure tab navigation
	prev_measure_btn.click(
	fn=lambda processed_data, current_selector: navigate_measure_view(
	processed_data, current_selector, -1
	),
	inputs=[processed_data_state, measure_view_selector],
	outputs=[measure_view_selector, measure_image, measure_points_state],
	)

	next_measure_btn.click(
	fn=lambda processed_data, current_selector: navigate_measure_view(
	processed_data, current_selector, 1
	),
	inputs=[processed_data_state, measure_view_selector],
	outputs=[measure_view_selector, measure_image, measure_points_state],
	)

	measure_view_selector.change(
	fn=lambda processed_data, selector_value: (
	update_measure_view(processed_data, int(selector_value.split()[1]) - 1)
	if selector_value
	else (None, [])
	),
	inputs=[processed_data_state, measure_view_selector],
	outputs=[measure_image, measure_points_state],
	)

	# -------------------------------------------------------------------------
	# Acknowledgement section
	# -------------------------------------------------------------------------
	gr.HTML(get_acknowledgements_html())

	demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)