Spaces:
Running
on
Zero
Running
on
Zero
Merge branch 'main' of https://huggingface.co/spaces/tencent/Hunyuan3D-2mv
Browse files- gradio_app.py +74 -30
- hy3dgen/shapegen/__init__.py +1 -1
- hy3dgen/shapegen/models/__init__.py +1 -1
- hy3dgen/shapegen/models/conditioner.py +12 -104
- hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py +3 -12
- hy3dgen/shapegen/pipelines.py +65 -181
- hy3dgen/shapegen/postprocessors.py +1 -4
- hy3dgen/shapegen/preprocessors.py +6 -55
gradio_app.py
CHANGED
|
@@ -1,10 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import random
|
| 3 |
import shutil
|
| 4 |
import time
|
| 5 |
from glob import glob
|
| 6 |
from pathlib import Path
|
| 7 |
-
import uuid
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
import torch
|
|
@@ -12,6 +25,7 @@ import trimesh
|
|
| 12 |
import uvicorn
|
| 13 |
from fastapi import FastAPI
|
| 14 |
from fastapi.staticfiles import StaticFiles
|
|
|
|
| 15 |
|
| 16 |
from hy3dgen.shapegen.utils import logger
|
| 17 |
|
|
@@ -28,6 +42,7 @@ if True:
|
|
| 28 |
print('install custom')
|
| 29 |
subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True)
|
| 30 |
|
|
|
|
| 31 |
def get_example_img_list():
|
| 32 |
print('Loading example img list ...')
|
| 33 |
return sorted(glob('./assets/example_images/**/*.png', recursive=True))
|
|
@@ -47,7 +62,7 @@ def get_example_mv_list():
|
|
| 47 |
root = './assets/example_mv_images'
|
| 48 |
for mv_dir in os.listdir(root):
|
| 49 |
view_list = []
|
| 50 |
-
for view in ['
|
| 51 |
path = os.path.join(root, mv_dir, f'{view}.png')
|
| 52 |
if os.path.exists(path):
|
| 53 |
view_list.append(path)
|
|
@@ -57,18 +72,6 @@ def get_example_mv_list():
|
|
| 57 |
return mv_list
|
| 58 |
|
| 59 |
|
| 60 |
-
# def gen_save_folder(max_size=60):
|
| 61 |
-
# os.makedirs(SAVE_DIR, exist_ok=True)
|
| 62 |
-
# exists = set(int(_) for _ in os.listdir(SAVE_DIR) if _.isdigit())
|
| 63 |
-
# cur_id = min(set(range(max_size)) - exists) if len(exists) < max_size else -1
|
| 64 |
-
# if os.path.exists(f"{SAVE_DIR}/{(cur_id + 1) % max_size}"):
|
| 65 |
-
# shutil.rmtree(f"{SAVE_DIR}/{(cur_id + 1) % max_size}")
|
| 66 |
-
# print(f"remove {SAVE_DIR}/{(cur_id + 1) % max_size} success !!!")
|
| 67 |
-
# save_folder = f"{SAVE_DIR}/{max(0, cur_id)}"
|
| 68 |
-
# os.makedirs(save_folder, exist_ok=True)
|
| 69 |
-
# print(f"mkdir {save_folder} suceess !!!")
|
| 70 |
-
# return save_folder
|
| 71 |
-
|
| 72 |
def gen_save_folder(max_size=200):
|
| 73 |
os.makedirs(SAVE_DIR, exist_ok=True)
|
| 74 |
|
|
@@ -139,7 +142,7 @@ def build_model_viewer_html(save_folder, height=660, width=790, textured=False):
|
|
| 139 |
</div>
|
| 140 |
"""
|
| 141 |
|
| 142 |
-
|
| 143 |
def _gen_shape(
|
| 144 |
caption=None,
|
| 145 |
image=None,
|
|
@@ -246,7 +249,7 @@ def _gen_shape(
|
|
| 246 |
main_image = image if not MV_MODE else image['front']
|
| 247 |
return mesh, main_image, save_folder, stats, seed
|
| 248 |
|
| 249 |
-
|
| 250 |
def generation_all(
|
| 251 |
caption=None,
|
| 252 |
image=None,
|
|
@@ -301,7 +304,8 @@ def generation_all(
|
|
| 301 |
path_textured = export_mesh(textured_mesh, save_folder, textured=True)
|
| 302 |
model_viewer_html_textured = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH,
|
| 303 |
textured=True)
|
| 304 |
-
|
|
|
|
| 305 |
return (
|
| 306 |
gr.update(value=path),
|
| 307 |
gr.update(value=path_textured),
|
|
@@ -310,7 +314,7 @@ def generation_all(
|
|
| 310 |
seed,
|
| 311 |
)
|
| 312 |
|
| 313 |
-
|
| 314 |
def shape_generation(
|
| 315 |
caption=None,
|
| 316 |
image=None,
|
|
@@ -347,7 +351,8 @@ def shape_generation(
|
|
| 347 |
|
| 348 |
path = export_mesh(mesh, save_folder, textured=False)
|
| 349 |
model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH)
|
| 350 |
-
|
|
|
|
| 351 |
return (
|
| 352 |
gr.update(value=path),
|
| 353 |
model_viewer_html,
|
|
@@ -362,6 +367,8 @@ def build_app():
|
|
| 362 |
title = 'Hunyuan3D-2mv: Image to 3D Generation with 1-4 Views'
|
| 363 |
if 'mini' in args.subfolder:
|
| 364 |
title = 'Hunyuan3D-2mini: Strong 0.6B Image to Shape Generator'
|
|
|
|
|
|
|
| 365 |
|
| 366 |
title_html = f"""
|
| 367 |
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
|
|
@@ -386,11 +393,11 @@ def build_app():
|
|
| 386 |
.mv-image button .wrap {
|
| 387 |
font-size: 10px;
|
| 388 |
}
|
| 389 |
-
|
| 390 |
.mv-image .icon-wrap {
|
| 391 |
width: 20px;
|
| 392 |
}
|
| 393 |
-
|
| 394 |
"""
|
| 395 |
|
| 396 |
with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.0', analytics_enabled=False, css=custom_css) as demo:
|
|
@@ -430,7 +437,15 @@ def build_app():
|
|
| 430 |
file_out = gr.File(label="File", visible=False)
|
| 431 |
file_out2 = gr.File(label="File", visible=False)
|
| 432 |
|
| 433 |
-
with gr.Tabs(selected='tab_export'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
with gr.Tab('Advanced Options', id='tab_advanced_options'):
|
| 435 |
with gr.Row():
|
| 436 |
check_box_rembg = gr.Checkbox(value=True, label='Remove Background', min_width=100)
|
|
@@ -446,14 +461,13 @@ def build_app():
|
|
| 446 |
with gr.Row():
|
| 447 |
num_steps = gr.Slider(maximum=100,
|
| 448 |
minimum=1,
|
| 449 |
-
value=30,
|
| 450 |
step=1, label='Inference Steps')
|
| 451 |
octree_resolution = gr.Slider(maximum=512, minimum=16, value=256, label='Octree Resolution')
|
| 452 |
with gr.Row():
|
| 453 |
cfg_scale = gr.Number(value=5.0, label='Guidance Scale', min_width=100)
|
| 454 |
-
num_chunks = gr.Slider(maximum=5000000, minimum=1000, value=
|
| 455 |
label='Number of Chunks', min_width=100)
|
| 456 |
-
|
| 457 |
with gr.Tab("Export", id='tab_export'):
|
| 458 |
with gr.Row():
|
| 459 |
file_type = gr.Dropdown(label='File Type', choices=SUPPORTED_FORMATS,
|
|
@@ -573,6 +587,26 @@ def build_app():
|
|
| 573 |
outputs=[tabs_output],
|
| 574 |
)
|
| 575 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
def on_export_click(file_out, file_out2, file_type, reduce_face, export_texture, target_face_num):
|
| 577 |
if file_out is None:
|
| 578 |
raise gr.Error('Please generate a mesh first.')
|
|
@@ -628,18 +662,22 @@ if __name__ == '__main__':
|
|
| 628 |
parser.add_argument('--port', type=int, default=7860)
|
| 629 |
parser.add_argument('--host', type=str, default='0.0.0.0')
|
| 630 |
parser.add_argument('--device', type=str, default='cuda')
|
| 631 |
-
parser.add_argument('--mc_algo', type=str, default='
|
| 632 |
parser.add_argument('--cache-path', type=str, default='gradio_cache')
|
| 633 |
parser.add_argument('--enable_t23d', action='store_true')
|
| 634 |
parser.add_argument('--disable_tex', action='store_true')
|
|
|
|
| 635 |
parser.add_argument('--compile', action='store_true')
|
|
|
|
| 636 |
args = parser.parse_args()
|
| 637 |
|
|
|
|
| 638 |
SAVE_DIR = args.cache_path
|
| 639 |
os.makedirs(SAVE_DIR, exist_ok=True)
|
| 640 |
|
| 641 |
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 642 |
MV_MODE = 'mv' in args.model_path
|
|
|
|
| 643 |
|
| 644 |
HTML_HEIGHT = 690 if MV_MODE else 650
|
| 645 |
HTML_WIDTH = 500
|
|
@@ -662,14 +700,15 @@ if __name__ == '__main__':
|
|
| 662 |
example_mvs = get_example_mv_list()
|
| 663 |
|
| 664 |
SUPPORTED_FORMATS = ['glb', 'obj', 'ply', 'stl']
|
| 665 |
-
|
| 666 |
-
args.disable_tex = True
|
| 667 |
HAS_TEXTUREGEN = False
|
| 668 |
if not args.disable_tex:
|
| 669 |
try:
|
| 670 |
from hy3dgen.texgen import Hunyuan3DPaintPipeline
|
| 671 |
|
| 672 |
texgen_worker = Hunyuan3DPaintPipeline.from_pretrained(args.texgen_model_path)
|
|
|
|
|
|
|
| 673 |
# Not help much, ignore for now.
|
| 674 |
# if args.compile:
|
| 675 |
# texgen_worker.models['delight_model'].pipeline.unet.compile()
|
|
@@ -699,9 +738,12 @@ if __name__ == '__main__':
|
|
| 699 |
i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
|
| 700 |
args.model_path,
|
| 701 |
subfolder=args.subfolder,
|
| 702 |
-
use_safetensors=
|
| 703 |
device=args.device,
|
| 704 |
)
|
|
|
|
|
|
|
|
|
|
| 705 |
if args.compile:
|
| 706 |
i23d_worker.compile()
|
| 707 |
|
|
@@ -718,6 +760,8 @@ if __name__ == '__main__':
|
|
| 718 |
app.mount("/static", StaticFiles(directory=static_dir, html=True), name="static")
|
| 719 |
shutil.copytree('./assets/env_maps', os.path.join(static_dir, 'env_maps'), dirs_exist_ok=True)
|
| 720 |
|
|
|
|
|
|
|
| 721 |
demo = build_app()
|
| 722 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 723 |
-
uvicorn.run(app, host=args.host, port=args.port)
|
|
|
|
| 1 |
+
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
|
| 2 |
+
# except for the third-party components listed below.
|
| 3 |
+
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
|
| 4 |
+
# in the repsective licenses of these third-party components.
|
| 5 |
+
# Users must comply with all terms and conditions of original licenses of these third-party
|
| 6 |
+
# components and must ensure that the usage of the third party components adheres to
|
| 7 |
+
# all relevant laws and regulations.
|
| 8 |
+
|
| 9 |
+
# For avoidance of doubts, Hunyuan 3D means the large language models and
|
| 10 |
+
# their software and algorithms, including trained model weights, parameters (including
|
| 11 |
+
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
|
| 12 |
+
# fine-tuning enabling code and other elements of the foregoing made publicly available
|
| 13 |
+
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 14 |
+
|
| 15 |
import os
|
| 16 |
import random
|
| 17 |
import shutil
|
| 18 |
import time
|
| 19 |
from glob import glob
|
| 20 |
from pathlib import Path
|
|
|
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
import torch
|
|
|
|
| 25 |
import uvicorn
|
| 26 |
from fastapi import FastAPI
|
| 27 |
from fastapi.staticfiles import StaticFiles
|
| 28 |
+
import uuid
|
| 29 |
|
| 30 |
from hy3dgen.shapegen.utils import logger
|
| 31 |
|
|
|
|
| 42 |
print('install custom')
|
| 43 |
subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True)
|
| 44 |
|
| 45 |
+
|
| 46 |
def get_example_img_list():
|
| 47 |
print('Loading example img list ...')
|
| 48 |
return sorted(glob('./assets/example_images/**/*.png', recursive=True))
|
|
|
|
| 62 |
root = './assets/example_mv_images'
|
| 63 |
for mv_dir in os.listdir(root):
|
| 64 |
view_list = []
|
| 65 |
+
for view in ['front', 'back', 'left', 'right']:
|
| 66 |
path = os.path.join(root, mv_dir, f'{view}.png')
|
| 67 |
if os.path.exists(path):
|
| 68 |
view_list.append(path)
|
|
|
|
| 72 |
return mv_list
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
def gen_save_folder(max_size=200):
|
| 76 |
os.makedirs(SAVE_DIR, exist_ok=True)
|
| 77 |
|
|
|
|
| 142 |
</div>
|
| 143 |
"""
|
| 144 |
|
| 145 |
+
|
| 146 |
def _gen_shape(
|
| 147 |
caption=None,
|
| 148 |
image=None,
|
|
|
|
| 249 |
main_image = image if not MV_MODE else image['front']
|
| 250 |
return mesh, main_image, save_folder, stats, seed
|
| 251 |
|
| 252 |
+
|
| 253 |
def generation_all(
|
| 254 |
caption=None,
|
| 255 |
image=None,
|
|
|
|
| 304 |
path_textured = export_mesh(textured_mesh, save_folder, textured=True)
|
| 305 |
model_viewer_html_textured = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH,
|
| 306 |
textured=True)
|
| 307 |
+
if args.low_vram_mode:
|
| 308 |
+
torch.cuda.empty_cache()
|
| 309 |
return (
|
| 310 |
gr.update(value=path),
|
| 311 |
gr.update(value=path_textured),
|
|
|
|
| 314 |
seed,
|
| 315 |
)
|
| 316 |
|
| 317 |
+
|
| 318 |
def shape_generation(
|
| 319 |
caption=None,
|
| 320 |
image=None,
|
|
|
|
| 351 |
|
| 352 |
path = export_mesh(mesh, save_folder, textured=False)
|
| 353 |
model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH)
|
| 354 |
+
if args.low_vram_mode:
|
| 355 |
+
torch.cuda.empty_cache()
|
| 356 |
return (
|
| 357 |
gr.update(value=path),
|
| 358 |
model_viewer_html,
|
|
|
|
| 367 |
title = 'Hunyuan3D-2mv: Image to 3D Generation with 1-4 Views'
|
| 368 |
if 'mini' in args.subfolder:
|
| 369 |
title = 'Hunyuan3D-2mini: Strong 0.6B Image to Shape Generator'
|
| 370 |
+
if TURBO_MODE:
|
| 371 |
+
title = title.replace(':', '-Turbo: Fast ')
|
| 372 |
|
| 373 |
title_html = f"""
|
| 374 |
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
|
|
|
|
| 393 |
.mv-image button .wrap {
|
| 394 |
font-size: 10px;
|
| 395 |
}
|
| 396 |
+
|
| 397 |
.mv-image .icon-wrap {
|
| 398 |
width: 20px;
|
| 399 |
}
|
| 400 |
+
|
| 401 |
"""
|
| 402 |
|
| 403 |
with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.0', analytics_enabled=False, css=custom_css) as demo:
|
|
|
|
| 437 |
file_out = gr.File(label="File", visible=False)
|
| 438 |
file_out2 = gr.File(label="File", visible=False)
|
| 439 |
|
| 440 |
+
with gr.Tabs(selected='tab_options' if TURBO_MODE else 'tab_export'):
|
| 441 |
+
with gr.Tab("Options", id='tab_options', visible=TURBO_MODE):
|
| 442 |
+
gen_mode = gr.Radio(label='Generation Mode',
|
| 443 |
+
info='Recommendation: Turbo for most cases, Fast for very complex cases, Standard seldom use.',
|
| 444 |
+
choices=['Turbo', 'Fast', 'Standard'], value='Turbo')
|
| 445 |
+
decode_mode = gr.Radio(label='Decoding Mode',
|
| 446 |
+
info='The resolution for exporting mesh from generated vectset',
|
| 447 |
+
choices=['Low', 'Standard', 'High'],
|
| 448 |
+
value='Standard')
|
| 449 |
with gr.Tab('Advanced Options', id='tab_advanced_options'):
|
| 450 |
with gr.Row():
|
| 451 |
check_box_rembg = gr.Checkbox(value=True, label='Remove Background', min_width=100)
|
|
|
|
| 461 |
with gr.Row():
|
| 462 |
num_steps = gr.Slider(maximum=100,
|
| 463 |
minimum=1,
|
| 464 |
+
value=5 if 'turbo' in args.subfolder else 30,
|
| 465 |
step=1, label='Inference Steps')
|
| 466 |
octree_resolution = gr.Slider(maximum=512, minimum=16, value=256, label='Octree Resolution')
|
| 467 |
with gr.Row():
|
| 468 |
cfg_scale = gr.Number(value=5.0, label='Guidance Scale', min_width=100)
|
| 469 |
+
num_chunks = gr.Slider(maximum=5000000, minimum=1000, value=8000,
|
| 470 |
label='Number of Chunks', min_width=100)
|
|
|
|
| 471 |
with gr.Tab("Export", id='tab_export'):
|
| 472 |
with gr.Row():
|
| 473 |
file_type = gr.Dropdown(label='File Type', choices=SUPPORTED_FORMATS,
|
|
|
|
| 587 |
outputs=[tabs_output],
|
| 588 |
)
|
| 589 |
|
| 590 |
+
def on_gen_mode_change(value):
|
| 591 |
+
if value == 'Turbo':
|
| 592 |
+
return gr.update(value=5)
|
| 593 |
+
elif value == 'Fast':
|
| 594 |
+
return gr.update(value=10)
|
| 595 |
+
else:
|
| 596 |
+
return gr.update(value=30)
|
| 597 |
+
|
| 598 |
+
gen_mode.change(on_gen_mode_change, inputs=[gen_mode], outputs=[num_steps])
|
| 599 |
+
|
| 600 |
+
def on_decode_mode_change(value):
|
| 601 |
+
if value == 'Low':
|
| 602 |
+
return gr.update(value=196)
|
| 603 |
+
elif value == 'Standard':
|
| 604 |
+
return gr.update(value=256)
|
| 605 |
+
else:
|
| 606 |
+
return gr.update(value=384)
|
| 607 |
+
|
| 608 |
+
decode_mode.change(on_decode_mode_change, inputs=[decode_mode], outputs=[octree_resolution])
|
| 609 |
+
|
| 610 |
def on_export_click(file_out, file_out2, file_type, reduce_face, export_texture, target_face_num):
|
| 611 |
if file_out is None:
|
| 612 |
raise gr.Error('Please generate a mesh first.')
|
|
|
|
| 662 |
parser.add_argument('--port', type=int, default=7860)
|
| 663 |
parser.add_argument('--host', type=str, default='0.0.0.0')
|
| 664 |
parser.add_argument('--device', type=str, default='cuda')
|
| 665 |
+
parser.add_argument('--mc_algo', type=str, default='mc')
|
| 666 |
parser.add_argument('--cache-path', type=str, default='gradio_cache')
|
| 667 |
parser.add_argument('--enable_t23d', action='store_true')
|
| 668 |
parser.add_argument('--disable_tex', action='store_true')
|
| 669 |
+
parser.add_argument('--enable_flashvdm', action='store_true')
|
| 670 |
parser.add_argument('--compile', action='store_true')
|
| 671 |
+
parser.add_argument('--low_vram_mode', action='store_true')
|
| 672 |
args = parser.parse_args()
|
| 673 |
|
| 674 |
+
args.enable_flashvdm = True
|
| 675 |
SAVE_DIR = args.cache_path
|
| 676 |
os.makedirs(SAVE_DIR, exist_ok=True)
|
| 677 |
|
| 678 |
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 679 |
MV_MODE = 'mv' in args.model_path
|
| 680 |
+
TURBO_MODE = 'turbo' in args.subfolder
|
| 681 |
|
| 682 |
HTML_HEIGHT = 690 if MV_MODE else 650
|
| 683 |
HTML_WIDTH = 500
|
|
|
|
| 700 |
example_mvs = get_example_mv_list()
|
| 701 |
|
| 702 |
SUPPORTED_FORMATS = ['glb', 'obj', 'ply', 'stl']
|
| 703 |
+
|
|
|
|
| 704 |
HAS_TEXTUREGEN = False
|
| 705 |
if not args.disable_tex:
|
| 706 |
try:
|
| 707 |
from hy3dgen.texgen import Hunyuan3DPaintPipeline
|
| 708 |
|
| 709 |
texgen_worker = Hunyuan3DPaintPipeline.from_pretrained(args.texgen_model_path)
|
| 710 |
+
if args.low_vram_mode:
|
| 711 |
+
texgen_worker.enable_model_cpu_offload()
|
| 712 |
# Not help much, ignore for now.
|
| 713 |
# if args.compile:
|
| 714 |
# texgen_worker.models['delight_model'].pipeline.unet.compile()
|
|
|
|
| 738 |
i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
|
| 739 |
args.model_path,
|
| 740 |
subfolder=args.subfolder,
|
| 741 |
+
use_safetensors=True,
|
| 742 |
device=args.device,
|
| 743 |
)
|
| 744 |
+
if args.enable_flashvdm:
|
| 745 |
+
mc_algo = 'mc' if args.device in ['cpu', 'mps'] else args.mc_algo
|
| 746 |
+
i23d_worker.enable_flashvdm(mc_algo=mc_algo)
|
| 747 |
if args.compile:
|
| 748 |
i23d_worker.compile()
|
| 749 |
|
|
|
|
| 760 |
app.mount("/static", StaticFiles(directory=static_dir, html=True), name="static")
|
| 761 |
shutil.copytree('./assets/env_maps', os.path.join(static_dir, 'env_maps'), dirs_exist_ok=True)
|
| 762 |
|
| 763 |
+
if args.low_vram_mode:
|
| 764 |
+
torch.cuda.empty_cache()
|
| 765 |
demo = build_app()
|
| 766 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 767 |
+
uvicorn.run(app, host=args.host, port=args.port, workers=1)
|
hy3dgen/shapegen/__init__.py
CHANGED
|
@@ -13,5 +13,5 @@
|
|
| 13 |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 14 |
|
| 15 |
from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline
|
| 16 |
-
from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover
|
| 17 |
from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR
|
|
|
|
| 13 |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 14 |
|
| 15 |
from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline
|
| 16 |
+
from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover
|
| 17 |
from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR
|
hy3dgen/shapegen/models/__init__.py
CHANGED
|
@@ -25,4 +25,4 @@
|
|
| 25 |
|
| 26 |
from .autoencoders import ShapeVAE
|
| 27 |
from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
|
| 28 |
-
from .denoisers import Hunyuan3DDiT
|
|
|
|
| 25 |
|
| 26 |
from .autoencoders import ShapeVAE
|
| 27 |
from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
|
| 28 |
+
from .denoisers import HunYuanDiTPlain, Hunyuan3DDiT
|
hy3dgen/shapegen/models/conditioner.py
CHANGED
|
@@ -22,7 +22,6 @@
|
|
| 22 |
# fine-tuning enabling code and other elements of the foregoing made publicly available
|
| 23 |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 24 |
|
| 25 |
-
import numpy as np
|
| 26 |
import torch
|
| 27 |
import torch.nn as nn
|
| 28 |
from torchvision import transforms
|
|
@@ -34,26 +33,6 @@ from transformers import (
|
|
| 34 |
)
|
| 35 |
|
| 36 |
|
| 37 |
-
def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
|
| 38 |
-
"""
|
| 39 |
-
embed_dim: output dimension for each position
|
| 40 |
-
pos: a list of positions to be encoded: size (M,)
|
| 41 |
-
out: (M, D)
|
| 42 |
-
"""
|
| 43 |
-
assert embed_dim % 2 == 0
|
| 44 |
-
omega = np.arange(embed_dim // 2, dtype=np.float64)
|
| 45 |
-
omega /= embed_dim / 2.
|
| 46 |
-
omega = 1. / 10000 ** omega # (D/2,)
|
| 47 |
-
|
| 48 |
-
pos = pos.reshape(-1) # (M,)
|
| 49 |
-
out = np.einsum('m,d->md', pos, omega) # (M, D/2), outer product
|
| 50 |
-
|
| 51 |
-
emb_sin = np.sin(out) # (M, D/2)
|
| 52 |
-
emb_cos = np.cos(out) # (M, D/2)
|
| 53 |
-
|
| 54 |
-
return np.concatenate([emb_sin, emb_cos], axis=1)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
class ImageEncoder(nn.Module):
|
| 58 |
def __init__(
|
| 59 |
self,
|
|
@@ -88,7 +67,7 @@ class ImageEncoder(nn.Module):
|
|
| 88 |
]
|
| 89 |
)
|
| 90 |
|
| 91 |
-
def forward(self, image, mask=None, value_range=(-1, 1)
|
| 92 |
if value_range is not None:
|
| 93 |
low, high = value_range
|
| 94 |
image = (image - low) / (high - low)
|
|
@@ -103,7 +82,7 @@ class ImageEncoder(nn.Module):
|
|
| 103 |
|
| 104 |
return last_hidden_state
|
| 105 |
|
| 106 |
-
def unconditional_embedding(self, batch_size
|
| 107 |
device = next(self.model.parameters()).device
|
| 108 |
dtype = next(self.model.parameters()).dtype
|
| 109 |
zero = torch.zeros(
|
|
@@ -131,82 +110,11 @@ class DinoImageEncoder(ImageEncoder):
|
|
| 131 |
std = [0.229, 0.224, 0.225]
|
| 132 |
|
| 133 |
|
| 134 |
-
class DinoImageEncoderMV(DinoImageEncoder):
|
| 135 |
-
def __init__(
|
| 136 |
-
self,
|
| 137 |
-
version=None,
|
| 138 |
-
config=None,
|
| 139 |
-
use_cls_token=True,
|
| 140 |
-
image_size=224,
|
| 141 |
-
view_num=4,
|
| 142 |
-
**kwargs,
|
| 143 |
-
):
|
| 144 |
-
super().__init__(version, config, use_cls_token, image_size, **kwargs)
|
| 145 |
-
self.view_num = view_num
|
| 146 |
-
self.num_patches = self.num_patches
|
| 147 |
-
pos = np.arange(self.view_num, dtype=np.float32)
|
| 148 |
-
view_embedding = torch.from_numpy(
|
| 149 |
-
get_1d_sincos_pos_embed_from_grid(self.model.config.hidden_size, pos)).float()
|
| 150 |
-
|
| 151 |
-
view_embedding = view_embedding.unsqueeze(1).repeat(1, self.num_patches, 1)
|
| 152 |
-
self.view_embed = view_embedding.unsqueeze(0)
|
| 153 |
-
|
| 154 |
-
def forward(self, image, mask=None, value_range=(-1, 1), view_idxs=None):
|
| 155 |
-
if value_range is not None:
|
| 156 |
-
low, high = value_range
|
| 157 |
-
image = (image - low) / (high - low)
|
| 158 |
-
|
| 159 |
-
image = image.to(self.model.device, dtype=self.model.dtype)
|
| 160 |
-
|
| 161 |
-
bs, num_views, c, h, w = image.shape
|
| 162 |
-
image = image.view(bs * num_views, c, h, w)
|
| 163 |
-
|
| 164 |
-
inputs = self.transform(image)
|
| 165 |
-
outputs = self.model(inputs)
|
| 166 |
-
|
| 167 |
-
last_hidden_state = outputs.last_hidden_state
|
| 168 |
-
last_hidden_state = last_hidden_state.view(
|
| 169 |
-
bs, num_views, last_hidden_state.shape[-2],
|
| 170 |
-
last_hidden_state.shape[-1]
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
view_embedding = self.view_embed.to(last_hidden_state.dtype).to(last_hidden_state.device)
|
| 174 |
-
if view_idxs is not None:
|
| 175 |
-
assert len(view_idxs) == bs
|
| 176 |
-
view_embeddings = []
|
| 177 |
-
for i in range(bs):
|
| 178 |
-
view_idx = view_idxs[i]
|
| 179 |
-
assert num_views == len(view_idx)
|
| 180 |
-
view_embeddings.append(self.view_embed[:, view_idx, ...])
|
| 181 |
-
view_embedding = torch.cat(view_embeddings, 0).to(last_hidden_state.dtype).to(last_hidden_state.device)
|
| 182 |
-
|
| 183 |
-
if num_views != self.view_num:
|
| 184 |
-
view_embedding = view_embedding[:, :num_views, ...]
|
| 185 |
-
last_hidden_state = last_hidden_state + view_embedding
|
| 186 |
-
last_hidden_state = last_hidden_state.view(bs, num_views * last_hidden_state.shape[-2],
|
| 187 |
-
last_hidden_state.shape[-1])
|
| 188 |
-
return last_hidden_state
|
| 189 |
-
|
| 190 |
-
def unconditional_embedding(self, batch_size, view_idxs=None, **kwargs):
|
| 191 |
-
device = next(self.model.parameters()).device
|
| 192 |
-
dtype = next(self.model.parameters()).dtype
|
| 193 |
-
zero = torch.zeros(
|
| 194 |
-
batch_size,
|
| 195 |
-
self.num_patches * len(view_idxs[0]),
|
| 196 |
-
self.model.config.hidden_size,
|
| 197 |
-
device=device,
|
| 198 |
-
dtype=dtype,
|
| 199 |
-
)
|
| 200 |
-
return zero
|
| 201 |
-
|
| 202 |
-
|
| 203 |
def build_image_encoder(config):
|
| 204 |
if config['type'] == 'CLIPImageEncoder':
|
| 205 |
return CLIPImageEncoder(**config['kwargs'])
|
| 206 |
elif config['type'] == 'DinoImageEncoder':
|
| 207 |
return DinoImageEncoder(**config['kwargs'])
|
| 208 |
-
elif config['type'] == 'DinoImageEncoderMV':
|
| 209 |
-
return DinoImageEncoderMV(**config['kwargs'])
|
| 210 |
else:
|
| 211 |
raise ValueError(f'Unknown image encoder type: {config["type"]}')
|
| 212 |
|
|
@@ -221,17 +129,17 @@ class DualImageEncoder(nn.Module):
|
|
| 221 |
self.main_image_encoder = build_image_encoder(main_image_encoder)
|
| 222 |
self.additional_image_encoder = build_image_encoder(additional_image_encoder)
|
| 223 |
|
| 224 |
-
def forward(self, image, mask=None
|
| 225 |
outputs = {
|
| 226 |
-
'main': self.main_image_encoder(image, mask=mask
|
| 227 |
-
'additional': self.additional_image_encoder(image, mask=mask
|
| 228 |
}
|
| 229 |
return outputs
|
| 230 |
|
| 231 |
-
def unconditional_embedding(self, batch_size
|
| 232 |
outputs = {
|
| 233 |
-
'main': self.main_image_encoder.unconditional_embedding(batch_size
|
| 234 |
-
'additional': self.additional_image_encoder.unconditional_embedding(batch_size
|
| 235 |
}
|
| 236 |
return outputs
|
| 237 |
|
|
@@ -244,14 +152,14 @@ class SingleImageEncoder(nn.Module):
|
|
| 244 |
super().__init__()
|
| 245 |
self.main_image_encoder = build_image_encoder(main_image_encoder)
|
| 246 |
|
| 247 |
-
def forward(self, image, mask=None
|
| 248 |
outputs = {
|
| 249 |
-
'main': self.main_image_encoder(image, mask=mask
|
| 250 |
}
|
| 251 |
return outputs
|
| 252 |
|
| 253 |
-
def unconditional_embedding(self, batch_size
|
| 254 |
outputs = {
|
| 255 |
-
'main': self.main_image_encoder.unconditional_embedding(batch_size
|
| 256 |
}
|
| 257 |
return outputs
|
|
|
|
| 22 |
# fine-tuning enabling code and other elements of the foregoing made publicly available
|
| 23 |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 24 |
|
|
|
|
| 25 |
import torch
|
| 26 |
import torch.nn as nn
|
| 27 |
from torchvision import transforms
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
class ImageEncoder(nn.Module):
|
| 37 |
def __init__(
|
| 38 |
self,
|
|
|
|
| 67 |
]
|
| 68 |
)
|
| 69 |
|
| 70 |
+
def forward(self, image, mask=None, value_range=(-1, 1)):
|
| 71 |
if value_range is not None:
|
| 72 |
low, high = value_range
|
| 73 |
image = (image - low) / (high - low)
|
|
|
|
| 82 |
|
| 83 |
return last_hidden_state
|
| 84 |
|
| 85 |
+
def unconditional_embedding(self, batch_size):
|
| 86 |
device = next(self.model.parameters()).device
|
| 87 |
dtype = next(self.model.parameters()).dtype
|
| 88 |
zero = torch.zeros(
|
|
|
|
| 110 |
std = [0.229, 0.224, 0.225]
|
| 111 |
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def build_image_encoder(config):
|
| 114 |
if config['type'] == 'CLIPImageEncoder':
|
| 115 |
return CLIPImageEncoder(**config['kwargs'])
|
| 116 |
elif config['type'] == 'DinoImageEncoder':
|
| 117 |
return DinoImageEncoder(**config['kwargs'])
|
|
|
|
|
|
|
| 118 |
else:
|
| 119 |
raise ValueError(f'Unknown image encoder type: {config["type"]}')
|
| 120 |
|
|
|
|
| 129 |
self.main_image_encoder = build_image_encoder(main_image_encoder)
|
| 130 |
self.additional_image_encoder = build_image_encoder(additional_image_encoder)
|
| 131 |
|
| 132 |
+
def forward(self, image, mask=None):
|
| 133 |
outputs = {
|
| 134 |
+
'main': self.main_image_encoder(image, mask=mask),
|
| 135 |
+
'additional': self.additional_image_encoder(image, mask=mask),
|
| 136 |
}
|
| 137 |
return outputs
|
| 138 |
|
| 139 |
+
def unconditional_embedding(self, batch_size):
|
| 140 |
outputs = {
|
| 141 |
+
'main': self.main_image_encoder.unconditional_embedding(batch_size),
|
| 142 |
+
'additional': self.additional_image_encoder.unconditional_embedding(batch_size),
|
| 143 |
}
|
| 144 |
return outputs
|
| 145 |
|
|
|
|
| 152 |
super().__init__()
|
| 153 |
self.main_image_encoder = build_image_encoder(main_image_encoder)
|
| 154 |
|
| 155 |
+
def forward(self, image, mask=None):
|
| 156 |
outputs = {
|
| 157 |
+
'main': self.main_image_encoder(image, mask=mask),
|
| 158 |
}
|
| 159 |
return outputs
|
| 160 |
|
| 161 |
+
def unconditional_embedding(self, batch_size):
|
| 162 |
outputs = {
|
| 163 |
+
'main': self.main_image_encoder.unconditional_embedding(batch_size),
|
| 164 |
}
|
| 165 |
return outputs
|
hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py
CHANGED
|
@@ -60,15 +60,6 @@ def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 10
|
|
| 60 |
return embedding
|
| 61 |
|
| 62 |
|
| 63 |
-
class GELU(nn.Module):
|
| 64 |
-
def __init__(self, approximate='tanh'):
|
| 65 |
-
super().__init__()
|
| 66 |
-
self.approximate = approximate
|
| 67 |
-
|
| 68 |
-
def forward(self, x: Tensor) -> Tensor:
|
| 69 |
-
return nn.functional.gelu(x.contiguous(), approximate=self.approximate)
|
| 70 |
-
|
| 71 |
-
|
| 72 |
class MLPEmbedder(nn.Module):
|
| 73 |
def __init__(self, in_dim: int, hidden_dim: int):
|
| 74 |
super().__init__()
|
|
@@ -171,7 +162,7 @@ class DoubleStreamBlock(nn.Module):
|
|
| 171 |
self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
|
| 172 |
self.img_mlp = nn.Sequential(
|
| 173 |
nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
|
| 174 |
-
GELU(approximate="tanh"),
|
| 175 |
nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
|
| 176 |
)
|
| 177 |
|
|
@@ -182,7 +173,7 @@ class DoubleStreamBlock(nn.Module):
|
|
| 182 |
self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
|
| 183 |
self.txt_mlp = nn.Sequential(
|
| 184 |
nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
|
| 185 |
-
GELU(approximate="tanh"),
|
| 186 |
nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
|
| 187 |
)
|
| 188 |
|
|
@@ -248,7 +239,7 @@ class SingleStreamBlock(nn.Module):
|
|
| 248 |
self.hidden_size = hidden_size
|
| 249 |
self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
|
| 250 |
|
| 251 |
-
self.mlp_act = GELU(approximate="tanh")
|
| 252 |
self.modulation = Modulation(hidden_size, double=False)
|
| 253 |
|
| 254 |
def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor:
|
|
|
|
| 60 |
return embedding
|
| 61 |
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
class MLPEmbedder(nn.Module):
|
| 64 |
def __init__(self, in_dim: int, hidden_dim: int):
|
| 65 |
super().__init__()
|
|
|
|
| 162 |
self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
|
| 163 |
self.img_mlp = nn.Sequential(
|
| 164 |
nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
|
| 165 |
+
nn.GELU(approximate="tanh"),
|
| 166 |
nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
|
| 167 |
)
|
| 168 |
|
|
|
|
| 173 |
self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
|
| 174 |
self.txt_mlp = nn.Sequential(
|
| 175 |
nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
|
| 176 |
+
nn.GELU(approximate="tanh"),
|
| 177 |
nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
|
| 178 |
)
|
| 179 |
|
|
|
|
| 239 |
self.hidden_size = hidden_size
|
| 240 |
self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
|
| 241 |
|
| 242 |
+
self.mlp_act = nn.GELU(approximate="tanh")
|
| 243 |
self.modulation = Modulation(hidden_size, double=False)
|
| 244 |
|
| 245 |
def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor:
|
hy3dgen/shapegen/pipelines.py
CHANGED
|
@@ -24,12 +24,11 @@ import trimesh
|
|
| 24 |
import yaml
|
| 25 |
from PIL import Image
|
| 26 |
from diffusers.utils.torch_utils import randn_tensor
|
| 27 |
-
from diffusers.utils.import_utils import is_accelerate_version, is_accelerate_available
|
| 28 |
from tqdm import tqdm
|
| 29 |
|
| 30 |
from .models.autoencoders import ShapeVAE
|
| 31 |
from .models.autoencoders import SurfaceExtractors
|
| 32 |
-
from .utils import logger, synchronize_timer
|
| 33 |
|
| 34 |
|
| 35 |
def retrieve_timesteps(
|
|
@@ -128,9 +127,6 @@ def instantiate_from_config(config, **kwargs):
|
|
| 128 |
|
| 129 |
|
| 130 |
class Hunyuan3DDiTPipeline:
|
| 131 |
-
model_cpu_offload_seq = "conditioner->model->vae"
|
| 132 |
-
_exclude_from_cpu_offload = []
|
| 133 |
-
|
| 134 |
@classmethod
|
| 135 |
@synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
|
| 136 |
def from_single_file(
|
|
@@ -211,12 +207,34 @@ class Hunyuan3DDiTPipeline:
|
|
| 211 |
dtype=dtype,
|
| 212 |
device=device,
|
| 213 |
)
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
return cls.from_single_file(
|
| 221 |
ckpt_path,
|
| 222 |
config_path,
|
|
@@ -261,18 +279,12 @@ class Hunyuan3DDiTPipeline:
|
|
| 261 |
if enabled:
|
| 262 |
model_path = self.kwargs['from_pretrained_kwargs']['model_path']
|
| 263 |
turbo_vae_mapping = {
|
| 264 |
-
'Hunyuan3D-2':
|
| 265 |
-
'Hunyuan3D-
|
| 266 |
-
'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini-turbo'),
|
| 267 |
}
|
| 268 |
model_name = model_path.split('/')[-1]
|
| 269 |
if replace_vae and model_name in turbo_vae_mapping:
|
| 270 |
-
model_path, subfolder
|
| 271 |
-
self.vae = ShapeVAE.from_pretrained(
|
| 272 |
-
model_path, subfolder=subfolder,
|
| 273 |
-
use_safetensors=self.kwargs['from_pretrained_kwargs']['use_safetensors'],
|
| 274 |
-
device=self.device,
|
| 275 |
-
)
|
| 276 |
self.vae.enable_flashvdm_decoder(
|
| 277 |
enabled=enabled,
|
| 278 |
adaptive_kv_selection=adaptive_kv_selection,
|
|
@@ -282,146 +294,33 @@ class Hunyuan3DDiTPipeline:
|
|
| 282 |
else:
|
| 283 |
model_path = self.kwargs['from_pretrained_kwargs']['model_path']
|
| 284 |
vae_mapping = {
|
| 285 |
-
'Hunyuan3D-2':
|
| 286 |
-
'Hunyuan3D-
|
| 287 |
-
'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini'),
|
| 288 |
}
|
| 289 |
model_name = model_path.split('/')[-1]
|
| 290 |
if model_name in vae_mapping:
|
| 291 |
-
model_path, subfolder
|
| 292 |
-
self.vae = ShapeVAE.from_pretrained(model_path, subfolder=subfolder)
|
| 293 |
self.vae.enable_flashvdm_decoder(enabled=False)
|
| 294 |
|
| 295 |
def to(self, device=None, dtype=None):
|
| 296 |
-
if dtype is not None:
|
| 297 |
-
self.dtype = dtype
|
| 298 |
-
self.vae.to(dtype=dtype)
|
| 299 |
-
self.model.to(dtype=dtype)
|
| 300 |
-
self.conditioner.to(dtype=dtype)
|
| 301 |
if device is not None:
|
| 302 |
self.device = torch.device(device)
|
| 303 |
self.vae.to(device)
|
| 304 |
self.model.to(device)
|
| 305 |
self.conditioner.to(device)
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
[`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from
|
| 312 |
-
Accelerate's module hooks.
|
| 313 |
-
"""
|
| 314 |
-
for name, model in self.components.items():
|
| 315 |
-
if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload:
|
| 316 |
-
continue
|
| 317 |
-
|
| 318 |
-
if not hasattr(model, "_hf_hook"):
|
| 319 |
-
return self.device
|
| 320 |
-
for module in model.modules():
|
| 321 |
-
if (
|
| 322 |
-
hasattr(module, "_hf_hook")
|
| 323 |
-
and hasattr(module._hf_hook, "execution_device")
|
| 324 |
-
and module._hf_hook.execution_device is not None
|
| 325 |
-
):
|
| 326 |
-
return torch.device(module._hf_hook.execution_device)
|
| 327 |
-
return self.device
|
| 328 |
-
|
| 329 |
-
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
| 330 |
-
r"""
|
| 331 |
-
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
| 332 |
-
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
| 333 |
-
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
|
| 334 |
-
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
|
| 335 |
-
|
| 336 |
-
Arguments:
|
| 337 |
-
gpu_id (`int`, *optional*):
|
| 338 |
-
The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
|
| 339 |
-
device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
|
| 340 |
-
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
| 341 |
-
default to "cuda".
|
| 342 |
-
"""
|
| 343 |
-
if self.model_cpu_offload_seq is None:
|
| 344 |
-
raise ValueError(
|
| 345 |
-
"Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set."
|
| 346 |
-
)
|
| 347 |
-
|
| 348 |
-
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
|
| 349 |
-
from accelerate import cpu_offload_with_hook
|
| 350 |
-
else:
|
| 351 |
-
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
|
| 352 |
-
|
| 353 |
-
torch_device = torch.device(device)
|
| 354 |
-
device_index = torch_device.index
|
| 355 |
-
|
| 356 |
-
if gpu_id is not None and device_index is not None:
|
| 357 |
-
raise ValueError(
|
| 358 |
-
f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
|
| 359 |
-
f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
|
| 360 |
-
)
|
| 361 |
-
|
| 362 |
-
# _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0
|
| 363 |
-
self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0)
|
| 364 |
-
|
| 365 |
-
device_type = torch_device.type
|
| 366 |
-
device = torch.device(f"{device_type}:{self._offload_gpu_id}")
|
| 367 |
-
|
| 368 |
-
if self.device.type != "cpu":
|
| 369 |
-
self.to("cpu")
|
| 370 |
-
device_mod = getattr(torch, self.device.type, None)
|
| 371 |
-
if hasattr(device_mod, "empty_cache") and device_mod.is_available():
|
| 372 |
-
device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
| 373 |
-
|
| 374 |
-
all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)}
|
| 375 |
-
|
| 376 |
-
self._all_hooks = []
|
| 377 |
-
hook = None
|
| 378 |
-
for model_str in self.model_cpu_offload_seq.split("->"):
|
| 379 |
-
model = all_model_components.pop(model_str, None)
|
| 380 |
-
if not isinstance(model, torch.nn.Module):
|
| 381 |
-
continue
|
| 382 |
-
|
| 383 |
-
_, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook)
|
| 384 |
-
self._all_hooks.append(hook)
|
| 385 |
-
|
| 386 |
-
# CPU offload models that are not in the seq chain unless they are explicitly excluded
|
| 387 |
-
# these models will stay on CPU until maybe_free_model_hooks is called
|
| 388 |
-
# some models cannot be in the seq chain because they are iteratively called, such as controlnet
|
| 389 |
-
for name, model in all_model_components.items():
|
| 390 |
-
if not isinstance(model, torch.nn.Module):
|
| 391 |
-
continue
|
| 392 |
-
|
| 393 |
-
if name in self._exclude_from_cpu_offload:
|
| 394 |
-
model.to(device)
|
| 395 |
-
else:
|
| 396 |
-
_, hook = cpu_offload_with_hook(model, device)
|
| 397 |
-
self._all_hooks.append(hook)
|
| 398 |
-
|
| 399 |
-
def maybe_free_model_hooks(self):
|
| 400 |
-
r"""
|
| 401 |
-
Function that offloads all components, removes all model hooks that were added when using
|
| 402 |
-
`enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function
|
| 403 |
-
is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
|
| 404 |
-
functions correctly when applying enable_model_cpu_offload.
|
| 405 |
-
"""
|
| 406 |
-
if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
|
| 407 |
-
# `enable_model_cpu_offload` has not be called, so silently do nothing
|
| 408 |
-
return
|
| 409 |
-
|
| 410 |
-
for hook in self._all_hooks:
|
| 411 |
-
# offload model and remove hook from model
|
| 412 |
-
hook.offload()
|
| 413 |
-
hook.remove()
|
| 414 |
-
|
| 415 |
-
# make sure the model is in the same state as before calling it
|
| 416 |
-
self.enable_model_cpu_offload()
|
| 417 |
|
| 418 |
@synchronize_timer('Encode cond')
|
| 419 |
-
def encode_cond(self, image,
|
| 420 |
bsz = image.shape[0]
|
| 421 |
-
cond = self.conditioner(image=image,
|
| 422 |
|
| 423 |
if do_classifier_free_guidance:
|
| 424 |
-
un_cond = self.conditioner.unconditional_embedding(bsz
|
| 425 |
|
| 426 |
if dual_guidance:
|
| 427 |
un_cond_drop_main = copy.deepcopy(un_cond)
|
|
@@ -437,6 +336,8 @@ class Hunyuan3DDiTPipeline:
|
|
| 437 |
|
| 438 |
cond = cat_recursive(cond, un_cond_drop_main, un_cond)
|
| 439 |
else:
|
|
|
|
|
|
|
| 440 |
def cat_recursive(a, b):
|
| 441 |
if isinstance(a, torch.Tensor):
|
| 442 |
return torch.cat([a, b], dim=0).to(self.dtype)
|
|
@@ -482,27 +383,25 @@ class Hunyuan3DDiTPipeline:
|
|
| 482 |
latents = latents * getattr(self.scheduler, 'init_noise_sigma', 1.0)
|
| 483 |
return latents
|
| 484 |
|
| 485 |
-
def prepare_image(self, image)
|
| 486 |
if isinstance(image, str) and not os.path.exists(image):
|
| 487 |
raise FileNotFoundError(f"Couldn't find image at path {image}")
|
| 488 |
|
| 489 |
if not isinstance(image, list):
|
| 490 |
image = [image]
|
| 491 |
-
|
| 492 |
-
|
| 493 |
for img in image:
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
cond_input = {k: [] for k in outputs[0].keys()}
|
| 498 |
-
for output in outputs:
|
| 499 |
-
for key, value in output.items():
|
| 500 |
-
cond_input[key].append(value)
|
| 501 |
-
for key, value in cond_input.items():
|
| 502 |
-
if isinstance(value[0], torch.Tensor):
|
| 503 |
-
cond_input[key] = torch.cat(value, dim=0)
|
| 504 |
|
| 505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
| 508 |
"""
|
|
@@ -575,14 +474,10 @@ class Hunyuan3DDiTPipeline:
|
|
| 575 |
getattr(self.model, 'guidance_cond_proj_dim', None) is None
|
| 576 |
dual_guidance = dual_guidance_scale >= 0 and dual_guidance
|
| 577 |
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
additional_cond_inputs=cond_inputs,
|
| 583 |
-
do_classifier_free_guidance=do_classifier_free_guidance,
|
| 584 |
-
dual_guidance=False,
|
| 585 |
-
)
|
| 586 |
batch_size = image.shape[0]
|
| 587 |
|
| 588 |
t_dtype = torch.long
|
|
@@ -640,17 +535,7 @@ class Hunyuan3DDiTPipeline:
|
|
| 640 |
box_v, mc_level, num_chunks, octree_resolution, mc_algo,
|
| 641 |
)
|
| 642 |
|
| 643 |
-
def _export(
|
| 644 |
-
self,
|
| 645 |
-
latents,
|
| 646 |
-
output_type='trimesh',
|
| 647 |
-
box_v=1.01,
|
| 648 |
-
mc_level=0.0,
|
| 649 |
-
num_chunks=20000,
|
| 650 |
-
octree_resolution=256,
|
| 651 |
-
mc_algo='mc',
|
| 652 |
-
enable_pbar=True
|
| 653 |
-
):
|
| 654 |
if not output_type == "latent":
|
| 655 |
latents = 1. / self.vae.scale_factor * latents
|
| 656 |
latents = self.vae(latents)
|
|
@@ -677,7 +562,7 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
|
|
| 677 |
@torch.inference_mode()
|
| 678 |
def __call__(
|
| 679 |
self,
|
| 680 |
-
image: Union[str, List[str], Image.Image
|
| 681 |
num_inference_steps: int = 50,
|
| 682 |
timesteps: List[int] = None,
|
| 683 |
sigmas: List[float] = None,
|
|
@@ -705,11 +590,10 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
|
|
| 705 |
self.model.guidance_embed is True
|
| 706 |
)
|
| 707 |
|
| 708 |
-
|
| 709 |
-
image = cond_inputs.pop('image')
|
| 710 |
cond = self.encode_cond(
|
| 711 |
image=image,
|
| 712 |
-
|
| 713 |
do_classifier_free_guidance=do_classifier_free_guidance,
|
| 714 |
dual_guidance=False,
|
| 715 |
)
|
|
|
|
| 24 |
import yaml
|
| 25 |
from PIL import Image
|
| 26 |
from diffusers.utils.torch_utils import randn_tensor
|
|
|
|
| 27 |
from tqdm import tqdm
|
| 28 |
|
| 29 |
from .models.autoencoders import ShapeVAE
|
| 30 |
from .models.autoencoders import SurfaceExtractors
|
| 31 |
+
from .utils import logger, synchronize_timer
|
| 32 |
|
| 33 |
|
| 34 |
def retrieve_timesteps(
|
|
|
|
| 127 |
|
| 128 |
|
| 129 |
class Hunyuan3DDiTPipeline:
|
|
|
|
|
|
|
|
|
|
| 130 |
@classmethod
|
| 131 |
@synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
|
| 132 |
def from_single_file(
|
|
|
|
| 207 |
dtype=dtype,
|
| 208 |
device=device,
|
| 209 |
)
|
| 210 |
+
original_model_path = model_path
|
| 211 |
+
# try local path
|
| 212 |
+
base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
|
| 213 |
+
model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
|
| 214 |
+
logger.info(f'Try to load model from local path: {model_path}')
|
| 215 |
+
if not os.path.exists(model_path):
|
| 216 |
+
logger.info('Model path not exists, try to download from huggingface')
|
| 217 |
+
try:
|
| 218 |
+
import huggingface_hub
|
| 219 |
+
# download from huggingface
|
| 220 |
+
path = huggingface_hub.snapshot_download(repo_id=original_model_path)
|
| 221 |
+
model_path = os.path.join(path, subfolder)
|
| 222 |
+
except ImportError:
|
| 223 |
+
logger.warning(
|
| 224 |
+
"You need to install HuggingFace Hub to load models from the hub."
|
| 225 |
+
)
|
| 226 |
+
raise RuntimeError(f"Model path {model_path} not found")
|
| 227 |
+
except Exception as e:
|
| 228 |
+
raise e
|
| 229 |
+
|
| 230 |
+
if not os.path.exists(model_path):
|
| 231 |
+
raise FileNotFoundError(f"Model path {original_model_path} not found")
|
| 232 |
+
|
| 233 |
+
extension = 'ckpt' if not use_safetensors else 'safetensors'
|
| 234 |
+
variant = '' if variant is None else f'.{variant}'
|
| 235 |
+
ckpt_name = f'model{variant}.{extension}'
|
| 236 |
+
config_path = os.path.join(model_path, 'config.yaml')
|
| 237 |
+
ckpt_path = os.path.join(model_path, ckpt_name)
|
| 238 |
return cls.from_single_file(
|
| 239 |
ckpt_path,
|
| 240 |
config_path,
|
|
|
|
| 279 |
if enabled:
|
| 280 |
model_path = self.kwargs['from_pretrained_kwargs']['model_path']
|
| 281 |
turbo_vae_mapping = {
|
| 282 |
+
'Hunyuan3D-2': 'hunyuan3d-vae-v2-0-turbo',
|
| 283 |
+
'Hunyuan3D-2s': 'hunyuan3d-vae-v2-s-turbo'
|
|
|
|
| 284 |
}
|
| 285 |
model_name = model_path.split('/')[-1]
|
| 286 |
if replace_vae and model_name in turbo_vae_mapping:
|
| 287 |
+
self.vae = ShapeVAE.from_pretrained(model_path, subfolder=turbo_vae_mapping[model_name])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
self.vae.enable_flashvdm_decoder(
|
| 289 |
enabled=enabled,
|
| 290 |
adaptive_kv_selection=adaptive_kv_selection,
|
|
|
|
| 294 |
else:
|
| 295 |
model_path = self.kwargs['from_pretrained_kwargs']['model_path']
|
| 296 |
vae_mapping = {
|
| 297 |
+
'Hunyuan3D-2': 'hunyuan3d-vae-v2-0',
|
| 298 |
+
'Hunyuan3D-2s': 'hunyuan3d-vae-v2-s'
|
|
|
|
| 299 |
}
|
| 300 |
model_name = model_path.split('/')[-1]
|
| 301 |
if model_name in vae_mapping:
|
| 302 |
+
self.vae = ShapeVAE.from_pretrained(model_path, subfolder=vae_mapping[model_name])
|
|
|
|
| 303 |
self.vae.enable_flashvdm_decoder(enabled=False)
|
| 304 |
|
| 305 |
def to(self, device=None, dtype=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
if device is not None:
|
| 307 |
self.device = torch.device(device)
|
| 308 |
self.vae.to(device)
|
| 309 |
self.model.to(device)
|
| 310 |
self.conditioner.to(device)
|
| 311 |
+
if dtype is not None:
|
| 312 |
+
self.dtype = dtype
|
| 313 |
+
self.vae.to(dtype=dtype)
|
| 314 |
+
self.model.to(dtype=dtype)
|
| 315 |
+
self.conditioner.to(dtype=dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
@synchronize_timer('Encode cond')
|
| 318 |
+
def encode_cond(self, image, mask, do_classifier_free_guidance, dual_guidance):
|
| 319 |
bsz = image.shape[0]
|
| 320 |
+
cond = self.conditioner(image=image, mask=mask)
|
| 321 |
|
| 322 |
if do_classifier_free_guidance:
|
| 323 |
+
un_cond = self.conditioner.unconditional_embedding(bsz)
|
| 324 |
|
| 325 |
if dual_guidance:
|
| 326 |
un_cond_drop_main = copy.deepcopy(un_cond)
|
|
|
|
| 336 |
|
| 337 |
cond = cat_recursive(cond, un_cond_drop_main, un_cond)
|
| 338 |
else:
|
| 339 |
+
un_cond = self.conditioner.unconditional_embedding(bsz, **additional_cond_inputs)
|
| 340 |
+
|
| 341 |
def cat_recursive(a, b):
|
| 342 |
if isinstance(a, torch.Tensor):
|
| 343 |
return torch.cat([a, b], dim=0).to(self.dtype)
|
|
|
|
| 383 |
latents = latents * getattr(self.scheduler, 'init_noise_sigma', 1.0)
|
| 384 |
return latents
|
| 385 |
|
| 386 |
+
def prepare_image(self, image):
|
| 387 |
if isinstance(image, str) and not os.path.exists(image):
|
| 388 |
raise FileNotFoundError(f"Couldn't find image at path {image}")
|
| 389 |
|
| 390 |
if not isinstance(image, list):
|
| 391 |
image = [image]
|
| 392 |
+
image_pts = []
|
| 393 |
+
mask_pts = []
|
| 394 |
for img in image:
|
| 395 |
+
image_pt, mask_pt = self.image_processor(img, return_mask=True)
|
| 396 |
+
image_pts.append(image_pt)
|
| 397 |
+
mask_pts.append(mask_pt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
+
image_pts = torch.cat(image_pts, dim=0).to(self.device, dtype=self.dtype)
|
| 400 |
+
if mask_pts[0] is not None:
|
| 401 |
+
mask_pts = torch.cat(mask_pts, dim=0).to(self.device, dtype=self.dtype)
|
| 402 |
+
else:
|
| 403 |
+
mask_pts = None
|
| 404 |
+
return image_pts, mask_pts
|
| 405 |
|
| 406 |
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
| 407 |
"""
|
|
|
|
| 474 |
getattr(self.model, 'guidance_cond_proj_dim', None) is None
|
| 475 |
dual_guidance = dual_guidance_scale >= 0 and dual_guidance
|
| 476 |
|
| 477 |
+
image, mask = self.prepare_image(image)
|
| 478 |
+
cond = self.encode_cond(image=image,
|
| 479 |
+
do_classifier_free_guidance=do_classifier_free_guidance,
|
| 480 |
+
dual_guidance=dual_guidance)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
batch_size = image.shape[0]
|
| 482 |
|
| 483 |
t_dtype = torch.long
|
|
|
|
| 535 |
box_v, mc_level, num_chunks, octree_resolution, mc_algo,
|
| 536 |
)
|
| 537 |
|
| 538 |
+
def _export(self, latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo, enable_pbar=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
if not output_type == "latent":
|
| 540 |
latents = 1. / self.vae.scale_factor * latents
|
| 541 |
latents = self.vae(latents)
|
|
|
|
| 562 |
@torch.inference_mode()
|
| 563 |
def __call__(
|
| 564 |
self,
|
| 565 |
+
image: Union[str, List[str], Image.Image] = None,
|
| 566 |
num_inference_steps: int = 50,
|
| 567 |
timesteps: List[int] = None,
|
| 568 |
sigmas: List[float] = None,
|
|
|
|
| 590 |
self.model.guidance_embed is True
|
| 591 |
)
|
| 592 |
|
| 593 |
+
image, mask = self.prepare_image(image)
|
|
|
|
| 594 |
cond = self.encode_cond(
|
| 595 |
image=image,
|
| 596 |
+
mask=mask,
|
| 597 |
do_classifier_free_guidance=do_classifier_free_guidance,
|
| 598 |
dual_guidance=False,
|
| 599 |
)
|
hy3dgen/shapegen/postprocessors.py
CHANGED
|
@@ -12,16 +12,13 @@
|
|
| 12 |
# fine-tuning enabling code and other elements of the foregoing made publicly available
|
| 13 |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 14 |
|
| 15 |
-
import os
|
| 16 |
import tempfile
|
| 17 |
from typing import Union
|
| 18 |
|
| 19 |
-
import numpy as np
|
| 20 |
import pymeshlab
|
| 21 |
-
import torch
|
| 22 |
import trimesh
|
| 23 |
|
| 24 |
-
from .models.
|
| 25 |
from .utils import synchronize_timer
|
| 26 |
|
| 27 |
|
|
|
|
| 12 |
# fine-tuning enabling code and other elements of the foregoing made publicly available
|
| 13 |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
| 14 |
|
|
|
|
| 15 |
import tempfile
|
| 16 |
from typing import Union
|
| 17 |
|
|
|
|
| 18 |
import pymeshlab
|
|
|
|
| 19 |
import trimesh
|
| 20 |
|
| 21 |
+
from .models.vae import Latent2MeshOutput
|
| 22 |
from .utils import synchronize_timer
|
| 23 |
|
| 24 |
|
hy3dgen/shapegen/preprocessors.py
CHANGED
|
@@ -87,7 +87,9 @@ class ImageProcessorV2:
|
|
| 87 |
mask = mask.clip(0, 255).astype(np.uint8)
|
| 88 |
return result, mask
|
| 89 |
|
| 90 |
-
def
|
|
|
|
|
|
|
| 91 |
if isinstance(image, str):
|
| 92 |
image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
|
| 93 |
image, mask = self.recenter(image, border_ratio=border_ratio)
|
|
@@ -104,64 +106,13 @@ class ImageProcessorV2:
|
|
| 104 |
if to_tensor:
|
| 105 |
image = array_to_tensor(image)
|
| 106 |
mask = array_to_tensor(mask)
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
if self.border_ratio is not None:
|
| 111 |
-
border_ratio = self.border_ratio
|
| 112 |
-
image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
|
| 113 |
-
outputs = {
|
| 114 |
-
'image': image,
|
| 115 |
-
'mask': mask
|
| 116 |
-
}
|
| 117 |
-
return outputs
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
class MVImageProcessorV2(ImageProcessorV2):
|
| 121 |
-
"""
|
| 122 |
-
view order: front, front clockwise 90, back, front clockwise 270
|
| 123 |
-
"""
|
| 124 |
-
return_view_idx = True
|
| 125 |
-
|
| 126 |
-
def __init__(self, size=512, border_ratio=None):
|
| 127 |
-
super().__init__(size, border_ratio)
|
| 128 |
-
self.view2idx = {
|
| 129 |
-
'front': 0,
|
| 130 |
-
'left': 1,
|
| 131 |
-
'back': 2,
|
| 132 |
-
'right': 3
|
| 133 |
-
}
|
| 134 |
-
|
| 135 |
-
def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs):
|
| 136 |
-
if self.border_ratio is not None:
|
| 137 |
-
border_ratio = self.border_ratio
|
| 138 |
-
|
| 139 |
-
images = []
|
| 140 |
-
masks = []
|
| 141 |
-
view_idxs = []
|
| 142 |
-
for idx, (view_tag, image) in enumerate(image_dict.items()):
|
| 143 |
-
view_idxs.append(self.view2idx[view_tag])
|
| 144 |
-
image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
|
| 145 |
-
images.append(image)
|
| 146 |
-
masks.append(mask)
|
| 147 |
-
|
| 148 |
-
zipped_lists = zip(view_idxs, images, masks)
|
| 149 |
-
sorted_zipped_lists = sorted(zipped_lists)
|
| 150 |
-
view_idxs, images, masks = zip(*sorted_zipped_lists)
|
| 151 |
-
|
| 152 |
-
image = torch.cat(images, 0).unsqueeze(0)
|
| 153 |
-
mask = torch.cat(masks, 0).unsqueeze(0)
|
| 154 |
-
outputs = {
|
| 155 |
-
'image': image,
|
| 156 |
-
'mask': mask,
|
| 157 |
-
'view_idxs': view_idxs
|
| 158 |
-
}
|
| 159 |
-
return outputs
|
| 160 |
|
| 161 |
|
| 162 |
IMAGE_PROCESSORS = {
|
| 163 |
"v2": ImageProcessorV2,
|
| 164 |
-
'mv_v2': MVImageProcessorV2,
|
| 165 |
}
|
| 166 |
|
| 167 |
DEFAULT_IMAGEPROCESSOR = 'v2'
|
|
|
|
| 87 |
mask = mask.clip(0, 255).astype(np.uint8)
|
| 88 |
return result, mask
|
| 89 |
|
| 90 |
+
def __call__(self, image, border_ratio=0.15, to_tensor=True, return_mask=False, **kwargs):
|
| 91 |
+
if self.border_ratio is not None:
|
| 92 |
+
border_ratio = self.border_ratio
|
| 93 |
if isinstance(image, str):
|
| 94 |
image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
|
| 95 |
image, mask = self.recenter(image, border_ratio=border_ratio)
|
|
|
|
| 106 |
if to_tensor:
|
| 107 |
image = array_to_tensor(image)
|
| 108 |
mask = array_to_tensor(mask)
|
| 109 |
+
if return_mask:
|
| 110 |
+
return image, mask
|
| 111 |
+
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
IMAGE_PROCESSORS = {
|
| 115 |
"v2": ImageProcessorV2,
|
|
|
|
| 116 |
}
|
| 117 |
|
| 118 |
DEFAULT_IMAGEPROCESSOR = 'v2'
|