Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

xinjie.wang commited on Jun 10

Commit

22e4e0c

1 Parent(s): 07dcc27

update

Files changed (4) hide show

common.py CHANGED Viewed

@@ -165,7 +165,7 @@ if os.getenv("GRADIO_APP") == "imageto3d":
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
     PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-        "jetx/trellis-image-large"
     )
     # PIPELINE.cuda()
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
@@ -179,7 +179,7 @@ elif os.getenv("GRADIO_APP") == "textto3d":
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-        "jetx/trellis-image-large"
     )
     # PIPELINE.cuda()
     text_model_dir = "weights/Kolors"
@@ -671,6 +671,7 @@ def text2image_fn(
     image_wh: int | tuple[int, int] = [1024, 1024],
     rmbg_tag: str = "rembg",
     n_sample: int = 3,
     req: gr.Request = None,
 ):
     if isinstance(image_wh, int):
@@ -692,6 +693,7 @@ def text2image_fn(
         ip_image=ip_image,
         image_wh=image_wh,
         infer_step=infer_step,
     )
     for idx in range(len(images)):

     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
     PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+        "microsoft/TRELLIS-image-large"
     )
     # PIPELINE.cuda()
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+        "microsoft/TRELLIS-image-large"
     )
     # PIPELINE.cuda()
     text_model_dir = "weights/Kolors"
     image_wh: int | tuple[int, int] = [1024, 1024],
     rmbg_tag: str = "rembg",
     n_sample: int = 3,
+    seed: int = None,
     req: gr.Request = None,
 ):
     if isinstance(image_wh, int):
         ip_image=ip_image,
         image_wh=image_wh,
         infer_step=infer_step,
+        seed=seed,
     )
     for idx in range(len(images)):

embodied_gen/models/text_model.py CHANGED Viewed

@@ -18,6 +18,8 @@
 import logging
 import torch
 from diffusers import (
     AutoencoderKL,
     EulerDiscreteScheduler,
@@ -138,11 +140,18 @@ def text2img_gen(
     image_wh: tuple[int, int] = [1024, 1024],
     infer_step: int = 50,
     ip_image_size: int = 512,
 ) -> list[Image.Image]:
     prompt = "Single " + prompt + ", in the center of the image"
     prompt += ", high quality, high resolution, best quality, white background, 3D style,"  # noqa
     logger.info(f"Processing prompt: {prompt}")
     kwargs = dict(
         prompt=prompt,
         height=image_wh[1],
@@ -150,6 +159,7 @@ def text2img_gen(
         num_inference_steps=infer_step,
         guidance_scale=guidance_scale,
         num_images_per_prompt=n_sample,
     )
     if ip_image is not None:
         if isinstance(ip_image, str):

 import logging
 import torch
+import numpy as np
+import random
 from diffusers import (
     AutoencoderKL,
     EulerDiscreteScheduler,
     image_wh: tuple[int, int] = [1024, 1024],
     infer_step: int = 50,
     ip_image_size: int = 512,
+    seed: int = None,
 ) -> list[Image.Image]:
     prompt = "Single " + prompt + ", in the center of the image"
     prompt += ", high quality, high resolution, best quality, white background, 3D style,"  # noqa
     logger.info(f"Processing prompt: {prompt}")
+    if seed is not None:
+        generator = torch.Generator(pipeline.device).manual_seed(seed)
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
     kwargs = dict(
         prompt=prompt,
         height=image_wh[1],
         num_inference_steps=infer_step,
         guidance_scale=guidance_scale,
         num_images_per_prompt=n_sample,
+        generator=generator,
     )
     if ip_image is not None:
         if isinstance(ip_image, str):

embodied_gen/scripts/imageto3d.py CHANGED Viewed

@@ -70,7 +70,7 @@ IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
 RBG_REMOVER = RembgRemover()
 RBG14_REMOVER = BMGG14Remover()
 SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
-PIPELINE = TrellisImageTo3DPipeline.from_pretrained("jetx/trellis-image-large")
 PIPELINE.cuda()
 SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
 GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)

 RBG_REMOVER = RembgRemover()
 RBG14_REMOVER = BMGG14Remover()
 SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
+PIPELINE = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
 PIPELINE.cuda()
 SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
 GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)

embodied_gen/scripts/text2image.py CHANGED Viewed

@@ -82,6 +82,11 @@ def parse_args():
         type=int,
         default=50,
     )
     args = parser.parse_args()
     return args
@@ -143,6 +148,7 @@ def entrypoint(
             ip_image=ip_img_path,
             image_wh=[args.resolution, args.resolution],
             infer_step=args.infer_step,
         )
         save_paths = []

         type=int,
         default=50,
     )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=0,
+    )
     args = parser.parse_args()
     return args
             ip_image=ip_img_path,
             image_wh=[args.resolution, args.resolution],
             infer_step=args.infer_step,
+            seed=args.seed,
         )
         save_paths = []