Spaces:

lch01
/

StreamVGGT

Running on Zero

lch01 commited on Jul 18

Commit

0c61197

1 Parent(s): dad59d0

fix output of inference

Files changed (1) hide show

app.py CHANGED Viewed

@@ -85,11 +85,35 @@ def run_model(target_dir, model) -> dict:
     with torch.no_grad():
         with torch.cuda.amp.autocast(dtype=dtype):
-            predictions = model.inference(frames)
     # Convert pose encoding to extrinsic and intrinsic matrices
     print("Converting pose encoding to extrinsic and intrinsic matrices...")
-    extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:])
     predictions["extrinsic"] = extrinsic
     predictions["intrinsic"] = intrinsic

     with torch.no_grad():
         with torch.cuda.amp.autocast(dtype=dtype):
+            output = model.inference(frames)
+    predictions = {}
+    all_pts3d = []
+    all_conf = []
+    all_depth = []
+    all_depth_conf = []
+    all_camera_pose = []
+    for res in output.ress:
+        all_pts3d.append(res['pts3d_in_other_view'])
+        all_conf.append(res['conf'])
+        all_depth.append(res['depth'])
+        all_depth_conf.append(res['depth_conf'])
+        all_camera_pose.append(res['camera_pose'])
+    predictions["world_points"] = torch.stack(all_pts3d, dim=0)  # (S, H, W, 3)
+    predictions["world_points_conf"] = torch.stack(all_conf, dim=0)  # (S, H, W)
+    predictions["depth"] = torch.stack(all_depth, dim=0)  # (S, H, W, 1)
+    predictions["depth_conf"] = torch.stack(all_depth_conf, dim=0)  # (S, H, W)
+    predictions["pose_enc"] = torch.stack(all_camera_pose, dim=0)  # (S, 9)
+    predictions["images"] = images.unsqueeze(0)  # (1, S, 3, H, W)
     # Convert pose encoding to extrinsic and intrinsic matrices
     print("Converting pose encoding to extrinsic and intrinsic matrices...")
+    extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["camera_pose"], images.shape[-2:])
     predictions["extrinsic"] = extrinsic
     predictions["intrinsic"] = intrinsic