juanmackie commited on
Commit
d6e1b99
·
verified ·
1 Parent(s): 0ec340f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -4
app.py CHANGED
@@ -34,9 +34,8 @@ try:
34
  except FileNotFoundError:
35
  print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
36
  print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
37
- print("and placed them in a 'checkpoints' folder. Refer to the setup instructions.")
38
- # Exit or handle gracefully if the model cannot be loaded
39
- # For now, setting model to None to prevent runtime errors if not loaded
40
  model = None
41
  except Exception as e:
42
  print(f"An error occurred while loading the Depth Anything V2 model: {e}")
@@ -46,4 +45,130 @@ except Exception as e:
46
  def process_image(image, max_disparity_ratio, inpaint_radius):
47
  """
48
  Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
49
- for depth estimation and DIBR, with adjustable paramete
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  except FileNotFoundError:
35
  print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
36
  print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
37
+ print("and placed them in a 'checkpoints' folder. Refer to the setup instructions provided earlier.")
38
+ # Set model to None to gracefully handle if it couldn't be loaded
 
39
  model = None
40
  except Exception as e:
41
  print(f"An error occurred while loading the Depth Anything V2 model: {e}")
 
45
  def process_image(image, max_disparity_ratio, inpaint_radius):
46
  """
47
  Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
48
+ for depth estimation and DIBR, with adjustable parameters.
49
+ """
50
+ if model is None:
51
+ # If model failed to load, return an error image or message
52
+ print("Error: Depth Anything V2 model not loaded. Cannot process image.")
53
+ # Create a blank red image to signal an error in the UI
54
+ return Image.new('RGB', (200, 200), color = 'red')
55
+
56
+ # Convert PIL image to numpy array
57
+ image_np = np.array(image)
58
+ height, width = image_np.shape[:2]
59
+
60
+ # Convert RGB to BGR for OpenCV compatibility (Depth Anything V2's infer_image expects BGR)
61
+ image_np_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
62
+
63
+ # Step 1: Estimate the depth map using Depth Anything V2
64
+ # The infer_image method handles preprocessing internally.
65
+ # The default input_size for Depth Anything V2 is 518, which is generally good.
66
+ with torch.no_grad():
67
+ depth_map = model.infer_image(image_np_bgr, input_size=518) # HxW raw depth map in numpy
68
+
69
+ # Normalize the depth map to [0,1]
70
+ # Depth Anything V2 outputs can vary, so normalization is key for consistent disparity.
71
+ depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
72
+
73
+ # Smooth the depth map to reduce noise (optional, but good for DIBR artifacts)
74
+ # This helps in reducing jagged edges and improving the visual quality of shifts.
75
+ depth_map = cv2.GaussianBlur(depth_map, (5, 5), 0)
76
+
77
+ # Step 2: Calculate the disparity map
78
+ # Disparity is inversely proportional to depth. Closer objects have higher disparity.
79
+ # max_disparity_pixels is calculated based on a ratio of the image width.
80
+ max_disparity_pixels = int(max_disparity_ratio * width)
81
+ # We invert the depth_map because smaller depth values usually mean closer objects
82
+ # and thus should have larger disparity.
83
+ disparity_map = max_disparity_pixels * (1 - depth_map)
84
+
85
+ # Step 3: Initialize left and right images and masks for DIBR
86
+ # These will store the shifted pixels and track unfilled areas.
87
+ left_image = np.zeros_like(image_np)
88
+ right_image = np.zeros_like(image_np)
89
+ # Masks are initialized to True (all areas are "holes" initially for the target images)
90
+ left_mask = np.ones((height, width), dtype=bool)
91
+ right_mask = np.ones((height, width), dtype=bool)
92
+
93
+ # Step 4: Perform pixel shifting based on disparity (forward warping)
94
+ # Iterate through each pixel of the original image and place it in the new views.
95
+ for y in range(height):
96
+ for x in range(width):
97
+ disparity = int(disparity_map[y, x])
98
+
99
+ # For the left eye, pixels are shifted to the right
100
+ new_x_left = x + disparity
101
+ # For the right eye, pixels are shifted to the left
102
+ new_x_right = x - disparity
103
+
104
+ # Place pixel in left image if within bounds
105
+ if 0 <= new_x_left < width:
106
+ left_image[y, new_x_left] = image_np[y, x]
107
+ left_mask[y, new_x_left] = False # Mark this spot as filled
108
+
109
+ # Place pixel in right image if within bounds
110
+ if 0 <= new_x_right < width:
111
+ right_image[y, new_x_right] = image_np[y, x]
112
+ right_mask[y, new_x_right] = False # Mark this spot as filled
113
+
114
+ # Convert masks to uint8 (0 or 255) as required by OpenCV's inpainting function
115
+ left_mask_uint8 = left_mask.astype(np.uint8) * 255
116
+ right_mask_uint8 = right_mask.astype(np.uint8) * 255
117
+
118
+ # Step 5: Apply inpainting to fill holes (disoccluded regions)
119
+ # `cv2.INPAINT_TELEA` is a good algorithm for this purpose.
120
+ left_image_inpaint = cv2.inpaint(left_image, left_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
121
+ right_image_inpaint = cv2.inpaint(right_image, right_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
122
+
123
+ # Step 6: Combine into a side-by-side stereoscopic image
124
+ # This is the standard format for 3D viewing on headsets like Quest.
125
+ stereo_image = np.hstack((left_image_inpaint, right_image_inpaint))
126
+
127
+ # Convert back to PIL image for output in Gradio
128
+ stereo_image_pil = Image.fromarray(stereo_image)
129
+ return stereo_image_pil
130
+
131
+ # Define Gradio interface for end-to-end pipeline
132
+ with gr.Blocks() as demo:
133
+ gr.Markdown(
134
+ """
135
+ # 2D to Stereoscopic 3D Converter
136
+ Upload a 2D photo to generate a stereoscopic 3D image pair for viewing on a Quest headset.
137
+ The output is a side-by-side image: left half for the left eye, right half for the right eye.
138
+ Adjust the sliders to fine-tune the 3D effect and reduce distortion.
139
+ """
140
+ )
141
+ with gr.Row():
142
+ with gr.Column():
143
+ input_image = gr.Image(type="pil", label="Upload a 2D Photo")
144
+ max_disparity_slider = gr.Slider(
145
+ minimum=0.01,
146
+ maximum=0.10,
147
+ value=0.03, # Default max disparity ratio
148
+ step=0.005,
149
+ label="Max Disparity Ratio (controls 3D intensity)",
150
+ info="Higher values mean a stronger 3D effect, but can cause more distortion."
151
+ )
152
+ inpaint_radius_slider = gr.Slider(
153
+ minimum=1,
154
+ maximum=20,
155
+ value=5, # Default inpainting radius
156
+ step=1,
157
+ label="Inpainting Radius (controls hole filling)",
158
+ info="Larger values fill holes more, but can blur details around shifted objects."
159
+ )
160
+ process_button = gr.Button("Convert to 3D")
161
+ with gr.Column():
162
+ output_image = gr.Image(type="pil", label="Stereoscopic 3D Output (Side-by-Side)")
163
+
164
+ # Connect the button click event to the processing function
165
+ process_button.click(
166
+ fn=process_image,
167
+ inputs=[input_image, max_disparity_slider, inpaint_radius_slider],
168
+ outputs=output_image
169
+ )
170
+
171
+ if __name__ == '__main__':
172
+ # This part will be executed when you run the script directly.
173
+ # For Hugging Face Spaces, the `app.py` is usually run by the platform.
174
+ demo.launch()