Update app.py
Browse files
app.py
CHANGED
|
@@ -34,9 +34,8 @@ try:
|
|
| 34 |
except FileNotFoundError:
|
| 35 |
print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
|
| 36 |
print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
|
| 37 |
-
print("and placed them in a 'checkpoints' folder. Refer to the setup instructions.")
|
| 38 |
-
#
|
| 39 |
-
# For now, setting model to None to prevent runtime errors if not loaded
|
| 40 |
model = None
|
| 41 |
except Exception as e:
|
| 42 |
print(f"An error occurred while loading the Depth Anything V2 model: {e}")
|
|
@@ -46,4 +45,130 @@ except Exception as e:
|
|
| 46 |
def process_image(image, max_disparity_ratio, inpaint_radius):
|
| 47 |
"""
|
| 48 |
Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
|
| 49 |
-
for depth estimation and DIBR, with adjustable
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
except FileNotFoundError:
|
| 35 |
print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
|
| 36 |
print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
|
| 37 |
+
print("and placed them in a 'checkpoints' folder. Refer to the setup instructions provided earlier.")
|
| 38 |
+
# Set model to None to gracefully handle if it couldn't be loaded
|
|
|
|
| 39 |
model = None
|
| 40 |
except Exception as e:
|
| 41 |
print(f"An error occurred while loading the Depth Anything V2 model: {e}")
|
|
|
|
| 45 |
def process_image(image, max_disparity_ratio, inpaint_radius):
|
| 46 |
"""
|
| 47 |
Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
|
| 48 |
+
for depth estimation and DIBR, with adjustable parameters.
|
| 49 |
+
"""
|
| 50 |
+
if model is None:
|
| 51 |
+
# If model failed to load, return an error image or message
|
| 52 |
+
print("Error: Depth Anything V2 model not loaded. Cannot process image.")
|
| 53 |
+
# Create a blank red image to signal an error in the UI
|
| 54 |
+
return Image.new('RGB', (200, 200), color = 'red')
|
| 55 |
+
|
| 56 |
+
# Convert PIL image to numpy array
|
| 57 |
+
image_np = np.array(image)
|
| 58 |
+
height, width = image_np.shape[:2]
|
| 59 |
+
|
| 60 |
+
# Convert RGB to BGR for OpenCV compatibility (Depth Anything V2's infer_image expects BGR)
|
| 61 |
+
image_np_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
|
| 62 |
+
|
| 63 |
+
# Step 1: Estimate the depth map using Depth Anything V2
|
| 64 |
+
# The infer_image method handles preprocessing internally.
|
| 65 |
+
# The default input_size for Depth Anything V2 is 518, which is generally good.
|
| 66 |
+
with torch.no_grad():
|
| 67 |
+
depth_map = model.infer_image(image_np_bgr, input_size=518) # HxW raw depth map in numpy
|
| 68 |
+
|
| 69 |
+
# Normalize the depth map to [0,1]
|
| 70 |
+
# Depth Anything V2 outputs can vary, so normalization is key for consistent disparity.
|
| 71 |
+
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
|
| 72 |
+
|
| 73 |
+
# Smooth the depth map to reduce noise (optional, but good for DIBR artifacts)
|
| 74 |
+
# This helps in reducing jagged edges and improving the visual quality of shifts.
|
| 75 |
+
depth_map = cv2.GaussianBlur(depth_map, (5, 5), 0)
|
| 76 |
+
|
| 77 |
+
# Step 2: Calculate the disparity map
|
| 78 |
+
# Disparity is inversely proportional to depth. Closer objects have higher disparity.
|
| 79 |
+
# max_disparity_pixels is calculated based on a ratio of the image width.
|
| 80 |
+
max_disparity_pixels = int(max_disparity_ratio * width)
|
| 81 |
+
# We invert the depth_map because smaller depth values usually mean closer objects
|
| 82 |
+
# and thus should have larger disparity.
|
| 83 |
+
disparity_map = max_disparity_pixels * (1 - depth_map)
|
| 84 |
+
|
| 85 |
+
# Step 3: Initialize left and right images and masks for DIBR
|
| 86 |
+
# These will store the shifted pixels and track unfilled areas.
|
| 87 |
+
left_image = np.zeros_like(image_np)
|
| 88 |
+
right_image = np.zeros_like(image_np)
|
| 89 |
+
# Masks are initialized to True (all areas are "holes" initially for the target images)
|
| 90 |
+
left_mask = np.ones((height, width), dtype=bool)
|
| 91 |
+
right_mask = np.ones((height, width), dtype=bool)
|
| 92 |
+
|
| 93 |
+
# Step 4: Perform pixel shifting based on disparity (forward warping)
|
| 94 |
+
# Iterate through each pixel of the original image and place it in the new views.
|
| 95 |
+
for y in range(height):
|
| 96 |
+
for x in range(width):
|
| 97 |
+
disparity = int(disparity_map[y, x])
|
| 98 |
+
|
| 99 |
+
# For the left eye, pixels are shifted to the right
|
| 100 |
+
new_x_left = x + disparity
|
| 101 |
+
# For the right eye, pixels are shifted to the left
|
| 102 |
+
new_x_right = x - disparity
|
| 103 |
+
|
| 104 |
+
# Place pixel in left image if within bounds
|
| 105 |
+
if 0 <= new_x_left < width:
|
| 106 |
+
left_image[y, new_x_left] = image_np[y, x]
|
| 107 |
+
left_mask[y, new_x_left] = False # Mark this spot as filled
|
| 108 |
+
|
| 109 |
+
# Place pixel in right image if within bounds
|
| 110 |
+
if 0 <= new_x_right < width:
|
| 111 |
+
right_image[y, new_x_right] = image_np[y, x]
|
| 112 |
+
right_mask[y, new_x_right] = False # Mark this spot as filled
|
| 113 |
+
|
| 114 |
+
# Convert masks to uint8 (0 or 255) as required by OpenCV's inpainting function
|
| 115 |
+
left_mask_uint8 = left_mask.astype(np.uint8) * 255
|
| 116 |
+
right_mask_uint8 = right_mask.astype(np.uint8) * 255
|
| 117 |
+
|
| 118 |
+
# Step 5: Apply inpainting to fill holes (disoccluded regions)
|
| 119 |
+
# `cv2.INPAINT_TELEA` is a good algorithm for this purpose.
|
| 120 |
+
left_image_inpaint = cv2.inpaint(left_image, left_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
|
| 121 |
+
right_image_inpaint = cv2.inpaint(right_image, right_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
|
| 122 |
+
|
| 123 |
+
# Step 6: Combine into a side-by-side stereoscopic image
|
| 124 |
+
# This is the standard format for 3D viewing on headsets like Quest.
|
| 125 |
+
stereo_image = np.hstack((left_image_inpaint, right_image_inpaint))
|
| 126 |
+
|
| 127 |
+
# Convert back to PIL image for output in Gradio
|
| 128 |
+
stereo_image_pil = Image.fromarray(stereo_image)
|
| 129 |
+
return stereo_image_pil
|
| 130 |
+
|
| 131 |
+
# Define Gradio interface for end-to-end pipeline
|
| 132 |
+
with gr.Blocks() as demo:
|
| 133 |
+
gr.Markdown(
|
| 134 |
+
"""
|
| 135 |
+
# 2D to Stereoscopic 3D Converter
|
| 136 |
+
Upload a 2D photo to generate a stereoscopic 3D image pair for viewing on a Quest headset.
|
| 137 |
+
The output is a side-by-side image: left half for the left eye, right half for the right eye.
|
| 138 |
+
Adjust the sliders to fine-tune the 3D effect and reduce distortion.
|
| 139 |
+
"""
|
| 140 |
+
)
|
| 141 |
+
with gr.Row():
|
| 142 |
+
with gr.Column():
|
| 143 |
+
input_image = gr.Image(type="pil", label="Upload a 2D Photo")
|
| 144 |
+
max_disparity_slider = gr.Slider(
|
| 145 |
+
minimum=0.01,
|
| 146 |
+
maximum=0.10,
|
| 147 |
+
value=0.03, # Default max disparity ratio
|
| 148 |
+
step=0.005,
|
| 149 |
+
label="Max Disparity Ratio (controls 3D intensity)",
|
| 150 |
+
info="Higher values mean a stronger 3D effect, but can cause more distortion."
|
| 151 |
+
)
|
| 152 |
+
inpaint_radius_slider = gr.Slider(
|
| 153 |
+
minimum=1,
|
| 154 |
+
maximum=20,
|
| 155 |
+
value=5, # Default inpainting radius
|
| 156 |
+
step=1,
|
| 157 |
+
label="Inpainting Radius (controls hole filling)",
|
| 158 |
+
info="Larger values fill holes more, but can blur details around shifted objects."
|
| 159 |
+
)
|
| 160 |
+
process_button = gr.Button("Convert to 3D")
|
| 161 |
+
with gr.Column():
|
| 162 |
+
output_image = gr.Image(type="pil", label="Stereoscopic 3D Output (Side-by-Side)")
|
| 163 |
+
|
| 164 |
+
# Connect the button click event to the processing function
|
| 165 |
+
process_button.click(
|
| 166 |
+
fn=process_image,
|
| 167 |
+
inputs=[input_image, max_disparity_slider, inpaint_radius_slider],
|
| 168 |
+
outputs=output_image
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
if __name__ == '__main__':
|
| 172 |
+
# This part will be executed when you run the script directly.
|
| 173 |
+
# For Hugging Face Spaces, the `app.py` is usually run by the platform.
|
| 174 |
+
demo.launch()
|