Spaces:

juanmackie
/

2DTo3DSpatialPhotoConverter

Running

App Files Files Community

juanmackie commited on Jun 15

Commit

d6e1b99

verified ·

1 Parent(s): 0ec340f

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -4

app.py CHANGED Viewed

@@ -34,9 +34,8 @@ try:
 except FileNotFoundError:
     print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
     print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
-    print("and placed them in a 'checkpoints' folder. Refer to the setup instructions.")
-    # Exit or handle gracefully if the model cannot be loaded
-    # For now, setting model to None to prevent runtime errors if not loaded
     model = None
 except Exception as e:
     print(f"An error occurred while loading the Depth Anything V2 model: {e}")
@@ -46,4 +45,130 @@ except Exception as e:
 def process_image(image, max_disparity_ratio, inpaint_radius):
     """
     Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
-    for depth estimation and DIBR, with adjustable paramete

 except FileNotFoundError:
     print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
     print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
+    print("and placed them in a 'checkpoints' folder. Refer to the setup instructions provided earlier.")
+    # Set model to None to gracefully handle if it couldn't be loaded
     model = None
 except Exception as e:
     print(f"An error occurred while loading the Depth Anything V2 model: {e}")
 def process_image(image, max_disparity_ratio, inpaint_radius):
     """
     Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
+    for depth estimation and DIBR, with adjustable parameters.
+    """
+    if model is None:
+        # If model failed to load, return an error image or message
+        print("Error: Depth Anything V2 model not loaded. Cannot process image.")
+        # Create a blank red image to signal an error in the UI
+        return Image.new('RGB', (200, 200), color = 'red')
+    # Convert PIL image to numpy array
+    image_np = np.array(image)
+    height, width = image_np.shape[:2]
+    # Convert RGB to BGR for OpenCV compatibility (Depth Anything V2's infer_image expects BGR)
+    image_np_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    # Step 1: Estimate the depth map using Depth Anything V2
+    # The infer_image method handles preprocessing internally.
+    # The default input_size for Depth Anything V2 is 518, which is generally good.
+    with torch.no_grad():
+        depth_map = model.infer_image(image_np_bgr, input_size=518) # HxW raw depth map in numpy
+    # Normalize the depth map to [0,1]
+    # Depth Anything V2 outputs can vary, so normalization is key for consistent disparity.
+    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
+    # Smooth the depth map to reduce noise (optional, but good for DIBR artifacts)
+    # This helps in reducing jagged edges and improving the visual quality of shifts.
+    depth_map = cv2.GaussianBlur(depth_map, (5, 5), 0)
+    # Step 2: Calculate the disparity map
+    # Disparity is inversely proportional to depth. Closer objects have higher disparity.
+    # max_disparity_pixels is calculated based on a ratio of the image width.
+    max_disparity_pixels = int(max_disparity_ratio * width)
+    # We invert the depth_map because smaller depth values usually mean closer objects
+    # and thus should have larger disparity.
+    disparity_map = max_disparity_pixels * (1 - depth_map)
+    # Step 3: Initialize left and right images and masks for DIBR
+    # These will store the shifted pixels and track unfilled areas.
+    left_image = np.zeros_like(image_np)
+    right_image = np.zeros_like(image_np)
+    # Masks are initialized to True (all areas are "holes" initially for the target images)
+    left_mask = np.ones((height, width), dtype=bool)
+    right_mask = np.ones((height, width), dtype=bool)
+    # Step 4: Perform pixel shifting based on disparity (forward warping)
+    # Iterate through each pixel of the original image and place it in the new views.
+    for y in range(height):
+        for x in range(width):
+            disparity = int(disparity_map[y, x])
+            # For the left eye, pixels are shifted to the right
+            new_x_left = x + disparity
+            # For the right eye, pixels are shifted to the left
+            new_x_right = x - disparity
+            # Place pixel in left image if within bounds
+            if 0 <= new_x_left < width:
+                left_image[y, new_x_left] = image_np[y, x]
+                left_mask[y, new_x_left] = False # Mark this spot as filled
+            # Place pixel in right image if within bounds
+            if 0 <= new_x_right < width:
+                right_image[y, new_x_right] = image_np[y, x]
+                right_mask[y, new_x_right] = False # Mark this spot as filled
+    # Convert masks to uint8 (0 or 255) as required by OpenCV's inpainting function
+    left_mask_uint8 = left_mask.astype(np.uint8) * 255
+    right_mask_uint8 = right_mask.astype(np.uint8) * 255
+    # Step 5: Apply inpainting to fill holes (disoccluded regions)
+    # `cv2.INPAINT_TELEA` is a good algorithm for this purpose.
+    left_image_inpaint = cv2.inpaint(left_image, left_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
+    right_image_inpaint = cv2.inpaint(right_image, right_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
+    # Step 6: Combine into a side-by-side stereoscopic image
+    # This is the standard format for 3D viewing on headsets like Quest.
+    stereo_image = np.hstack((left_image_inpaint, right_image_inpaint))
+    # Convert back to PIL image for output in Gradio
+    stereo_image_pil = Image.fromarray(stereo_image)
+    return stereo_image_pil
+# Define Gradio interface for end-to-end pipeline
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # 2D to Stereoscopic 3D Converter
+        Upload a 2D photo to generate a stereoscopic 3D image pair for viewing on a Quest headset.
+        The output is a side-by-side image: left half for the left eye, right half for the right eye.
+        Adjust the sliders to fine-tune the 3D effect and reduce distortion.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload a 2D Photo")
+            max_disparity_slider = gr.Slider(
+                minimum=0.01,
+                maximum=0.10,
+                value=0.03, # Default max disparity ratio
+                step=0.005,
+                label="Max Disparity Ratio (controls 3D intensity)",
+                info="Higher values mean a stronger 3D effect, but can cause more distortion."
+            )
+            inpaint_radius_slider = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=5, # Default inpainting radius
+                step=1,
+                label="Inpainting Radius (controls hole filling)",
+                info="Larger values fill holes more, but can blur details around shifted objects."
+            )
+            process_button = gr.Button("Convert to 3D")
+        with gr.Column():
+            output_image = gr.Image(type="pil", label="Stereoscopic 3D Output (Side-by-Side)")
+    # Connect the button click event to the processing function
+    process_button.click(
+        fn=process_image,
+        inputs=[input_image, max_disparity_slider, inpaint_radius_slider],
+        outputs=output_image
+    )
+if __name__ == '__main__':
+    # This part will be executed when you run the script directly.
+    # For Hugging Face Spaces, the `app.py` is usually run by the platform.
+    demo.launch()