text-to-3d-2.0

Starting

App Files Files Community

jbilcke-hf commited on Jan 24

Commit

9e70cab

verified ·

1 Parent(s): 751171e

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +26 -11

gradio_app.py CHANGED Viewed

@@ -71,26 +71,30 @@ def create_batch(input_image: Image.Image) -> dict[str, Any]:
         rgb = img_array
         mask = np.ones((*img_array.shape[:2], 1), dtype=np.float32)
-    # Convert to tensors
-    rgb = torch.from_numpy(rgb).float()
-    mask = torch.from_numpy(mask).float()
     print("[debug] rgb tensor shape:", rgb.shape)
     print("[debug] mask tensor shape:", mask.shape)
     # Create background blend
-    bg_tensor = torch.tensor(BACKGROUND_COLOR)[None, None, :]
     print("[debug] bg_tensor shape:", bg_tensor.shape)
     # Blend RGB with background using mask
-    rgb_cond = torch.lerp(bg_tensor, rgb, mask)
-    print("[debug] rgb_cond shape:", rgb_cond.shape)
-    # Permute the tensors to match the expected shape [B, C, H, W]
-    rgb_cond = torch.movedim(rgb_cond, 2, 0).unsqueeze(0)  # [1, 3, H, W]
-    mask = torch.movedim(mask, 2, 0).unsqueeze(0)  # [1, 1, H, W]
-    print("[debug] rgb_cond after permute shape:", rgb_cond.shape)
-    print("[debug] mask after permute shape:", mask.shape)
     batch = {
         "rgb_cond": rgb_cond,
@@ -109,6 +113,17 @@ def create_batch(input_image: Image.Image) -> dict[str, Any]:
 def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
     """Process batch through model and generate point cloud."""
     print("[debug] Starting forward_model")
     batch_size = batch["rgb_cond"].shape[0]
     # Generate point cloud tokens

         rgb = img_array
         mask = np.ones((*img_array.shape[:2], 1), dtype=np.float32)
+    # Convert to tensors and keep in channel-last format initially
+    rgb = torch.from_numpy(rgb).float()  # [H, W, 3]
+    mask = torch.from_numpy(mask).float()  # [H, W, 1]
     print("[debug] rgb tensor shape:", rgb.shape)
     print("[debug] mask tensor shape:", mask.shape)
     # Create background blend
+    bg_tensor = torch.tensor(BACKGROUND_COLOR)  # [3]
     print("[debug] bg_tensor shape:", bg_tensor.shape)
     # Blend RGB with background using mask
+    rgb_cond = torch.lerp(
+        bg_tensor.view(1, 1, 3),  # [1, 1, 3]
+        rgb,  # [H, W, 3]
+        mask  # [H, W, 1]
+    )
+    print("[debug] rgb_cond shape after blend:", rgb_cond.shape)
+    # Permute the tensors to [B, C, H, W] format at the end
+    rgb_cond = rgb_cond.permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
+    mask = mask.permute(2, 0, 1).unsqueeze(0)  # [1, 1, H, W]
+    print("[debug] rgb_cond final shape:", rgb_cond.shape)
+    print("[debug] mask final shape:", mask.shape)
     batch = {
         "rgb_cond": rgb_cond,
 def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
     """Process batch through model and generate point cloud."""
     print("[debug] Starting forward_model")
+    print("[debug] Input rgb_cond shape:", batch["rgb_cond"].shape)
+    # Ensure input is in correct format [B, C, H, W]
+    if batch["rgb_cond"].shape[1] != 3:
+        batch["rgb_cond"] = batch["rgb_cond"].permute(0, 3, 1, 2)
+    if batch["mask_cond"].shape[1] != 1:
+        batch["mask_cond"] = batch["mask_cond"].permute(0, 3, 1, 2)
+    print("[debug] Processed rgb_cond shape:", batch["rgb_cond"].shape)
+    print("[debug] Processed mask_cond shape:", batch["mask_cond"].shape)
     batch_size = batch["rgb_cond"].shape[0]
     # Generate point cloud tokens