Alessio Grancini
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -107,6 +107,115 @@ def model_selector(model_type):
|
|
| 107 |
img_seg = ImageSegmenter(model_type=yolo_model)
|
| 108 |
depth_estimator = MonocularDepthEstimator(model_type=midas_model)
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def cancel():
|
| 111 |
CANCEL_PROCESSING = True
|
| 112 |
|
|
|
|
| 107 |
img_seg = ImageSegmenter(model_type=yolo_model)
|
| 108 |
depth_estimator = MonocularDepthEstimator(model_type=midas_model)
|
| 109 |
|
| 110 |
+
# START
|
| 111 |
+
# added for lens studio
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def get_box_vertices(bbox):
|
| 115 |
+
"""Convert bbox to corner vertices"""
|
| 116 |
+
x1, y1, x2, y2 = bbox
|
| 117 |
+
return [
|
| 118 |
+
[x1, y1], # top-left
|
| 119 |
+
[x2, y1], # top-right
|
| 120 |
+
[x2, y2], # bottom-right
|
| 121 |
+
[x1, y2] # bottom-left
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
def depth_at_center(depth_map, bbox):
|
| 125 |
+
"""Get depth at center of bounding box"""
|
| 126 |
+
x1, y1, x2, y2 = bbox
|
| 127 |
+
center_x = int((x1 + x2) / 2)
|
| 128 |
+
center_y = int((y1 + y2) / 2)
|
| 129 |
+
|
| 130 |
+
# Sample a small region around center for stability
|
| 131 |
+
region = depth_map[
|
| 132 |
+
max(0, center_y-2):min(depth_map.shape[0], center_y+3),
|
| 133 |
+
max(0, center_x-2):min(depth_map.shape[1], center_x+3)
|
| 134 |
+
]
|
| 135 |
+
return np.median(region)
|
| 136 |
+
|
| 137 |
+
def get_camera_matrix(depth_estimator):
|
| 138 |
+
"""Get camera calibration matrix"""
|
| 139 |
+
return {
|
| 140 |
+
"fx": depth_estimator.fx_depth,
|
| 141 |
+
"fy": depth_estimator.fy_depth,
|
| 142 |
+
"cx": depth_estimator.cx_depth,
|
| 143 |
+
"cy": depth_estimator.cy_depth
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
@spaces.GPU
|
| 147 |
+
def get_detection_data(image):
|
| 148 |
+
"""
|
| 149 |
+
Process image and return structured detection data with camera parameters
|
| 150 |
+
"""
|
| 151 |
+
try:
|
| 152 |
+
# Resize image if needed
|
| 153 |
+
image = utils.resize(image)
|
| 154 |
+
|
| 155 |
+
# Run detections
|
| 156 |
+
image_segmentation, objects_data = img_seg.predict(image)
|
| 157 |
+
depthmap, depth_colormap = depth_estimator.make_prediction(image)
|
| 158 |
+
|
| 159 |
+
# Get original image dimensions
|
| 160 |
+
height, width = image.shape[:2]
|
| 161 |
+
|
| 162 |
+
# Scale factor for normalizing coordinates
|
| 163 |
+
scale_x = width / depthmap.shape[1]
|
| 164 |
+
scale_y = height / depthmap.shape[0]
|
| 165 |
+
|
| 166 |
+
# Process each detection
|
| 167 |
+
detections = []
|
| 168 |
+
for obj in objects_data:
|
| 169 |
+
cls_id, category, center, mask, color = obj
|
| 170 |
+
|
| 171 |
+
# Get bounding box (assuming it's available in objects_data)
|
| 172 |
+
bbox = get_object_bbox(mask) # You'll need to implement this
|
| 173 |
+
|
| 174 |
+
# Get normalized coordinates
|
| 175 |
+
bbox_norm = [
|
| 176 |
+
bbox[0] / width,
|
| 177 |
+
bbox[1] / height,
|
| 178 |
+
bbox[2] / width,
|
| 179 |
+
bbox[3] / height
|
| 180 |
+
]
|
| 181 |
+
|
| 182 |
+
# Get vertices
|
| 183 |
+
vertices = get_box_vertices(bbox_norm)
|
| 184 |
+
|
| 185 |
+
# Get depth
|
| 186 |
+
depth_value = depth_at_center(depthmap, bbox)
|
| 187 |
+
|
| 188 |
+
# Create detection object
|
| 189 |
+
detection = {
|
| 190 |
+
"category": category,
|
| 191 |
+
"confidence": 1.0, # Add actual confidence if available
|
| 192 |
+
"bbox": bbox_norm,
|
| 193 |
+
"depth": float(depth_value), # Convert to native Python float
|
| 194 |
+
"vertices": vertices,
|
| 195 |
+
"color": [float(c/255) for c in color], # Normalize color
|
| 196 |
+
"mask": mask.tolist() if isinstance(mask, np.ndarray) else mask
|
| 197 |
+
}
|
| 198 |
+
detections.append(detection)
|
| 199 |
+
|
| 200 |
+
# Prepare response
|
| 201 |
+
response = {
|
| 202 |
+
"detections": detections,
|
| 203 |
+
"depth_map": depthmap.tolist(),
|
| 204 |
+
"camera_params": get_camera_matrix(depth_estimator),
|
| 205 |
+
"image_size": {
|
| 206 |
+
"width": width,
|
| 207 |
+
"height": height
|
| 208 |
+
}
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
return response
|
| 212 |
+
|
| 213 |
+
except Exception as e:
|
| 214 |
+
print(f"Error in get_detection_data: {str(e)}")
|
| 215 |
+
raise
|
| 216 |
+
|
| 217 |
+
# ENDS
|
| 218 |
+
|
| 219 |
def cancel():
|
| 220 |
CANCEL_PROCESSING = True
|
| 221 |
|