text-to-3d-2.0

Starting

App Files Files Community

jbilcke-hf commited on Jan 20

Commit

c882a68

verified ·

1 Parent(s): e02679c

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +44 -28

gradio_app.py CHANGED Viewed

@@ -47,38 +47,54 @@ def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.
     """Create an RGBA image from RGB image and optional mask."""
     rgba_image = rgb_image.convert('RGBA')
     if mask is not None:
-        # Convert mask to alpha channel format
         alpha = Image.fromarray((mask * 255).astype(np.uint8))
         rgba_image.putalpha(alpha)
     return rgba_image
 def create_batch(input_image: Image.Image) -> dict[str, Any]:
-   """Prepare image batch for model input."""
-   # Ensure input is RGBA
-   if input_image.mode != 'RGBA':
-       input_image = input_image.convert('RGBA')
-   # Resize and convert to numpy array
-   resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
-   img_array = np.array(resized_image).astype(np.float32) / 255.0
-   # Split into RGB and alpha
-   mask_cond = img_array[..., 3:4]  # Alpha channel
-   # Blend RGB with background based on alpha
-   rgb_cond = np.clip(
-       img_array[..., :3] * mask_cond + BACKGROUND_COLOR * (1 - mask_cond),
-       0,
-       1
-   )
-   batch = {
-       "rgb_cond": torch.from_numpy(rgb_cond).unsqueeze(0),
-       "mask_cond": torch.from_numpy(mask_cond).unsqueeze(0),
-       "c2w_cond": c2w_cond.unsqueeze(0),
-       "intrinsic_cond": intrinsic.unsqueeze(0),
-       "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
-   }
-   return batch
 def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024) -> tuple[str | None, Image.Image | None]:
     """Generate image from prompt and convert to 3D model."""

     """Create an RGBA image from RGB image and optional mask."""
     rgba_image = rgb_image.convert('RGBA')
     if mask is not None:
+        print("[debug] mask shape before alpha:", mask.shape)
+        # Ensure mask is 2D before converting to alpha
+        if len(mask.shape) > 2:
+            mask = mask.squeeze()
         alpha = Image.fromarray((mask * 255).astype(np.uint8))
+        print("[debug] alpha size:", alpha.size)
         rgba_image.putalpha(alpha)
     return rgba_image
 def create_batch(input_image: Image.Image) -> dict[str, Any]:
+    """Prepare image batch for model input."""
+    # Ensure input is RGBA
+    if input_image.mode != 'RGBA':
+        input_image = input_image.convert('RGBA')
+    # Resize and convert to numpy array
+    resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
+    img_array = np.array(resized_image).astype(np.float32) / 255.0
+    print("[debug] img_array shape:", img_array.shape)
+    # Split into RGB and alpha
+    rgb = torch.from_numpy(img_array[..., :3]).float()
+    alpha = torch.from_numpy(img_array[..., 3:4]).float()
+    print("[debug] rgb tensor shape:", rgb.shape)
+    print("[debug] alpha tensor shape:", alpha.shape)
+    # Create background blend using torch.lerp()
+    bg_tensor = torch.tensor(BACKGROUND_COLOR)[None, None, :]
+    print("[debug] bg_tensor shape:", bg_tensor.shape)
+    rgb_cond = torch.lerp(bg_tensor, rgb, alpha)
+    print("[debug] rgb_cond shape:", rgb_cond.shape)
+    batch = {
+        "rgb_cond": rgb_cond.unsqueeze(0),
+        "mask_cond": alpha.unsqueeze(0),
+        "c2w_cond": c2w_cond.unsqueeze(0),
+        "intrinsic_cond": intrinsic.unsqueeze(0),
+        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
+    }
+    # Final shapes check
+    for k, v in batch.items():
+        print(f"[debug] {k} final shape:", v.shape)
+    return batch
 def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024) -> tuple[str | None, Image.Image | None]:
     """Generate image from prompt and convert to 3D model."""