Spaces:

Viglong
/

Orient-Anything-V2

Running on CPU Upgrade

App Files Files Community

zhangziang commited on 24 days ago

Commit

a1a407f

1 Parent(s): 9868529

update layout,model,example

Browse files

Files changed (12) hide show

app.py +77 -41
assets/{axis_ref.png → examples/F35-0.jpg} +2 -2
assets/{axis_tgt.png → examples/F35-1.jpg} +2 -2
assets/examples/bottle.jpg +3 -0
assets/examples/hat.jpg +3 -0
assets/examples/skateboard-0.jpg +3 -0
assets/examples/skateboard-1.jpg +3 -0
assets/examples/table-0.jpg +3 -0
assets/examples/table-1.jpg +3 -0
inference.py +3 -3
orianyV2_demo.ipynb +0 -0
paths.py +1 -4

app.py CHANGED Viewed

@@ -2,8 +2,9 @@ import gradio as gr
 import numpy as np
 from PIL import Image
 import torch
-# ====== 你的原有导入和模型加载保持不变 ======
 from paths import *
 from vision_tower import VGGT_OriAny_Ref
 from inference import *
@@ -81,37 +82,52 @@ def run_inference(image_ref, image_tgt, do_rm_bkg):
     ro = safe_float(ans_dict.get('ref_ro_pred', 0))
     alpha = int(ans_dict.get('ref_alpha_pred', 1))  # 注意：target 默认 alpha=1，但 ref 可能不是
-    # ===== 渲染参考图的坐标轴 =====
-    axis_renderer.render_axis(az, el, ro, alpha, save_path=REF_AXIS_IMAGE)
-    axis_ref = Image.open(REF_AXIS_IMAGE).convert("RGBA")
-    # 叠加坐标轴到参考图
-    # 确保尺寸一致
-    if axis_ref.size != pil_ref.size:
-        axis_ref = axis_ref.resize(pil_ref.size, Image.LANCZOS)
-    pil_ref_rgba = pil_ref.convert("RGBA")
-    overlaid_ref = Image.alpha_composite(pil_ref_rgba, axis_ref).convert("RGB")
-    # ===== 处理目标图（如果有）=====
-    if pil_tgt is not None:
-        rel_az = safe_float(ans_dict.get('rel_az_pred', 0))
-        rel_el = safe_float(ans_dict.get('rel_el_pred', 0))
-        rel_ro = safe_float(ans_dict.get('rel_ro_pred', 0))
-        tgt_azi, tgt_ele, tgt_rot = Get_target_azi_ele_rot(az, el, ro, rel_az, rel_el, rel_ro)
-        print("Target: Azi",tgt_azi,"Ele",tgt_ele,"Rot",tgt_rot)
-        # target 默认 alpha=1（根据你的说明）
-        axis_renderer.render_axis(tgt_azi, tgt_ele, tgt_rot, alpha=1, save_path=TGT_AXIS_IMAGE)
-        axis_tgt = Image.open(TGT_AXIS_IMAGE).convert("RGBA")
-        if axis_tgt.size != pil_tgt.size:
-            axis_tgt = axis_tgt.resize(pil_tgt.size, Image.LANCZOS)
-        pil_tgt_rgba = pil_tgt.convert("RGBA")
-        overlaid_tgt = Image.alpha_composite(pil_tgt_rgba, axis_tgt).convert("RGB")
-    else:
-        overlaid_tgt = None
-        rel_az = rel_el = rel_ro = 0.0
     return [
         overlaid_ref,  # 渲染+叠加后的参考图
@@ -153,6 +169,26 @@ with gr.Blocks(title="Orient-Anything Demo") as demo:
                 )
             rm_bkg = gr.Checkbox(label="Remove Background", value=True)
             run_btn = gr.Button("Run Inference", variant="primary")
         # 右侧：结果图像 + 文本输出
         with gr.Column():
@@ -175,17 +211,17 @@ with gr.Blocks(title="Orient-Anything Demo") as demo:
             # 文本输出放在图像下方
             with gr.Row():
-                with gr.Column(scale=1):
                     gr.Markdown("### Absolute Pose (Reference)")
-                    az_out = gr.Textbox(label="Azimuth (0~360°)",scale=0.5)
-                    el_out = gr.Textbox(label="Polar (-90~90°)",scale=0.5)
-                    ro_out = gr.Textbox(label="Rotation (-90~90°)",scale=0.5)
-                    alpha_out = gr.Textbox(label="Number of Directions (0/1/2/4)",scale=0.5)
-                with gr.Column(scale=1):
                     gr.Markdown("### Relative Pose (Target w.r.t Reference)")
-                    rel_az_out = gr.Textbox(label="Relative Azimuth (0~360°)",scale=0.5)
-                    rel_el_out = gr.Textbox(label="Relative Polar (-90~90°)",scale=0.5)
-                    rel_ro_out = gr.Textbox(label="Relative Rotation (-90~90°)",scale=0.5)
     # 绑定点击事件
     run_btn.click(

 import numpy as np
 from PIL import Image
 import torch
+import os
+import tempfile
 from paths import *
 from vision_tower import VGGT_OriAny_Ref
 from inference import *
     ro = safe_float(ans_dict.get('ref_ro_pred', 0))
     alpha = int(ans_dict.get('ref_alpha_pred', 1))  # 注意：target 默认 alpha=1，但 ref 可能不是
+    # ===== 用临时文件保存渲染结果 =====
+    tmp_ref = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    tmp_tgt = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    tmp_ref.close()
+    tmp_tgt.close()
+    try:
+        # ===== 渲染参考图的坐标轴 =====
+        axis_renderer.render_axis(az, el, ro, alpha, save_path=tmp_ref.name)
+        axis_ref = Image.open(tmp_ref.name).convert("RGBA")
+        # 叠加坐标轴到参考图
+        # 确保尺寸一致
+        if axis_ref.size != pil_ref.size:
+            pil_ref = pil_ref.resize(axis_ref.size, Image.BICUBIC)
+        pil_ref_rgba = pil_ref.convert("RGBA")
+        overlaid_ref = Image.alpha_composite(pil_ref_rgba, axis_ref).convert("RGB")
+        # ===== 处理目标图（如果有）=====
+        if pil_tgt is not None:
+            rel_az = safe_float(ans_dict.get('rel_az_pred', 0))
+            rel_el = safe_float(ans_dict.get('rel_el_pred', 0))
+            rel_ro = safe_float(ans_dict.get('rel_ro_pred', 0))
+            tgt_azi, tgt_ele, tgt_rot = Get_target_azi_ele_rot(az, el, ro, rel_az, rel_el, rel_ro)
+            print("Target: Azi",tgt_azi,"Ele",tgt_ele,"Rot",tgt_rot)
+            # target 默认 alpha=1（根据你的说明）
+            axis_renderer.render_axis(tgt_azi, tgt_ele, tgt_rot, alpha=1, save_path=tmp_tgt.name)
+            axis_tgt = Image.open(tmp_tgt.name).convert("RGBA")
+            if axis_tgt.size != pil_tgt.size:
+                pil_tgt = pil_tgt.resize(axis_tgt.size, Image.BICUBIC)
+            pil_tgt_rgba = pil_tgt.convert("RGBA")
+            overlaid_tgt = Image.alpha_composite(pil_tgt_rgba, axis_tgt).convert("RGB")
+        else:
+            overlaid_tgt = None
+            rel_az = rel_el = rel_ro = 0.0
+    finally:
+        # 安全删除临时文件（即使出错也清理）
+        if os.path.exists(tmp_ref.name):
+            os.remove(tmp_ref.name)
+            print('cleaned {}'.format(tmp_ref.name))
+        if os.path.exists(tmp_tgt.name):
+            os.remove(tmp_tgt.name)
+            print('cleaned {}'.format(tmp_tgt.name))
     return [
         overlaid_ref,  # 渲染+叠加后的参考图
                 )
             rm_bkg = gr.Checkbox(label="Remove Background", value=True)
             run_btn = gr.Button("Run Inference", variant="primary")
+            # === 在这里插入示例 ===
+            with gr.Row():
+                gr.Examples(
+                    examples=[
+                        ["assets/examples/F35-0.jpg", "assets/examples/F35-1.jpg"],
+                        ["assets/examples/skateboard-0.jpg", "assets/examples/skateboard-1.jpg"],
+                    ],
+                    inputs=[ref_img, tgt_img],
+                    examples_per_page=2,
+                    label="Example Inputs (click to load)"
+                )
+                gr.Examples(
+                    examples=[
+                        ["assets/examples/table-0.jpg", "assets/examples/table-1.jpg"],
+                        ["assets/examples/bottle.jpg", None],
+                    ],
+                    inputs=[ref_img, tgt_img],
+                    examples_per_page=2,
+                    label=""
+                )
         # 右侧：结果图像 + 文本输出
         with gr.Column():
             # 文本输出放在图像下方
             with gr.Row():
+                with gr.Column():
                     gr.Markdown("### Absolute Pose (Reference)")
+                    az_out = gr.Textbox(label="Azimuth (0~360°)")
+                    el_out = gr.Textbox(label="Polar (-90~90°)")
+                    ro_out = gr.Textbox(label="Rotation (-90~90°)")
+                    alpha_out = gr.Textbox(label="Number of Directions (0/1/2/4)")
+                with gr.Column():
                     gr.Markdown("### Relative Pose (Target w.r.t Reference)")
+                    rel_az_out = gr.Textbox(label="Relative Azimuth (0~360°)")
+                    rel_el_out = gr.Textbox(label="Relative Polar (-90~90°)")
+                    rel_ro_out = gr.Textbox(label="Relative Rotation (-90~90°)")
     # 绑定点击事件
     run_btn.click(

assets/{axis_ref.png → examples/F35-0.jpg} RENAMED Viewed

File without changes

assets/{axis_tgt.png → examples/F35-1.jpg} RENAMED Viewed

File without changes

assets/examples/bottle.jpg ADDED Viewed

Git LFS Details

SHA256: a06e763afda918b0bbd5d5fe248e09b88bdeb72cd10c0343babf4eb14209a062
Pointer size: 130 Bytes
Size of remote file: 20.1 kB

assets/examples/hat.jpg ADDED Viewed

Git LFS Details

SHA256: 4fec959db7ee5ccad6cb20e74ffa68ef697bf0a1aa207ccc694b1faf29fccbcb
Pointer size: 131 Bytes
Size of remote file: 132 kB

assets/examples/skateboard-0.jpg ADDED Viewed

Git LFS Details

SHA256: 0f5783079542c8aec34b3ce041245f0e2050484c7fdabe7658cefa9fca05c078
Pointer size: 131 Bytes
Size of remote file: 258 kB

assets/examples/skateboard-1.jpg ADDED Viewed

Git LFS Details

SHA256: e88dbf8357d1d83960004e8a4723f5b9e4aee1a4eb4b3dc5384b75467019d576
Pointer size: 130 Bytes
Size of remote file: 65 kB

assets/examples/table-0.jpg ADDED Viewed

Git LFS Details

SHA256: 9eee2a881ea0be52bf5381b0948a0eea9bb173b4982dbd5e867b95b2368e7031
Pointer size: 129 Bytes
Size of remote file: 4.81 kB

assets/examples/table-1.jpg ADDED Viewed

Git LFS Details

SHA256: 590d5702941a56a3296f4c86cc7f8110ce8dae4b6103002ce0993cfb24b82489
Pointer size: 129 Bytes
Size of remote file: 4.6 kB

inference.py CHANGED Viewed

@@ -51,11 +51,11 @@ def val_fit_alpha(distribute):
         mu_fit, kappa_fit = saved_params[max_index]
         r_squared = saved_r_squared[max_index]
-        if alpha == 1. and kappa_fit>=0.5 and r_squared>=0.5:
             pass
-        elif alpha == 2. and kappa_fit>=0.35 and r_squared>=0.35:
             pass
-        elif alpha == 4. and kappa_fit>=0.25 and r_squared>=0.25:
             pass
         else:
             alpha=0.

         mu_fit, kappa_fit = saved_params[max_index]
         r_squared = saved_r_squared[max_index]
+        if alpha == 1. and kappa_fit>=0.6 and r_squared>=0.45:
             pass
+        elif alpha == 2. and kappa_fit>=0.4 and r_squared>=0.45:
             pass
+        elif alpha == 4. and kappa_fit>=0.25 and r_squared>=0.45:
             pass
         else:
             alpha=0.

orianyV2_demo.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

paths.py CHANGED Viewed

@@ -7,10 +7,7 @@ VGGT_1B     = "facebook/VGGT-1B"
 ORIANY_V2 = "Viglong/OriAnyV2_ckpt"
-REMOTE_CKPT_PATH = "demo_ckpts/acc8mask20lowlr.pt"
 RENDER_FILE = "assets/axis_render.blend"
-REF_AXIS_IMAGE = "assets/axis_ref.png"
-TGT_AXIS_IMAGE = "assets/axis_tgt.png"

 ORIANY_V2 = "Viglong/OriAnyV2_ckpt"
+REMOTE_CKPT_PATH = "demo_ckpts/rotmod_realrotaug_best.pt"
 RENDER_FILE = "assets/axis_render.blend"