Spaces:

lch01
/

StreamVGGT

Running on Zero

lch01 commited on Jul 18

Commit

87232d8

1 Parent(s): 181e6e9

fix output of inference

Files changed (1) hide show

app.py CHANGED Viewed

@@ -71,6 +71,9 @@ def run_model(target_dir, model) -> dict:
     images = load_and_preprocess_images(image_names).to(device)
     print(f"Preprocessed images shape: {images.shape}")
     frames = []
     for i in range(images.shape[0]):
         image = images[i].unsqueeze(0)
@@ -86,9 +89,7 @@ def run_model(target_dir, model) -> dict:
     with torch.no_grad():
         with torch.cuda.amp.autocast(dtype=dtype):
             output = model.inference(frames)
-    predictions = {}
     all_pts3d = []
     all_conf = []
     all_depth = []
@@ -108,13 +109,11 @@ def run_model(target_dir, model) -> dict:
     predictions["depth_conf"] = torch.stack(all_depth_conf, dim=0)  # (S, H, W)
     predictions["pose_enc"] = torch.stack(all_camera_pose, dim=0)  # (S, 9)
-    predictions["images"] = images.unsqueeze(0)  # (1, S, 3, H, W)
-    print("World points shape:", predictions["world_points"].shape)
-    print("World points confidence shape:", predictions["world_points_conf"].shape)
-    print("Depth map shape:", predictions["depth"].shape)
-    print("Depth confidence shape:", predictions["depth_conf"].shape)
-    print("Pose encoding shape:", predictions["pose_enc"].shape)
     # Convert pose encoding to extrinsic and intrinsic matrices
     print("Converting pose encoding to extrinsic and intrinsic matrices...")

     images = load_and_preprocess_images(image_names).to(device)
     print(f"Preprocessed images shape: {images.shape}")
+    predictions = {}
+    predictions["images"] = images  # (S, 3, H, W)
     frames = []
     for i in range(images.shape[0]):
         image = images[i].unsqueeze(0)
     with torch.no_grad():
         with torch.cuda.amp.autocast(dtype=dtype):
             output = model.inference(frames)
     all_pts3d = []
     all_conf = []
     all_depth = []
     predictions["depth_conf"] = torch.stack(all_depth_conf, dim=0)  # (S, H, W)
     predictions["pose_enc"] = torch.stack(all_camera_pose, dim=0)  # (S, 9)
+    #print("World points shape:", predictions["world_points"].shape)
+    #print("World points confidence shape:", predictions["world_points_conf"].shape)
+    #print("Depth map shape:", predictions["depth"].shape)
+    #print("Depth confidence shape:", predictions["depth_conf"].shape)
+    #print("Pose encoding shape:", predictions["pose_enc"].shape)
     # Convert pose encoding to extrinsic and intrinsic matrices
     print("Converting pose encoding to extrinsic and intrinsic matrices...")