Spaces:

ZiyuG
/

SAM2Point

Running on Zero

App Files Files Community

ZiyuG commited on Aug 28, 2024

Commit

762c88c

verified ·

1 Parent(s): 028b124

Update app.py

Browse files

Files changed (1) hide show

app.py +390 -58

app.py CHANGED Viewed

@@ -1,61 +1,393 @@
 import gradio as gr
-from huggingface_hub import hf_hub_download, HfFolder
-from PIL import Image
-import requests, torch
 import numpy as np
-from io import BytesIO
 import plotly.graph_objects as go
-import os
-def load_ScanNet_sample(data_path):
-    all_data = torch.load(data_path)
-    point = np.array(all_data['coord'])
-    color = np.array(all_data['color'])
-    point = point - point.min(axis=0)
-    point = point / point.max(axis=0)
-    color = color / 255.
-    return point, color
-def show_logo():
-    repo_id = "ZiyuG/Cache"
-    filename = "scene0000_00.pth"
-    token = os.getenv('HF_TOKEN')
-    print("token:", token)
-    try:
-        file_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token, repo_type='dataset')
-        point, color = load_ScanNet_sample(file_path)
-        if point.shape[0] > 100000:
-            indices = np.random.choice(point.shape[0], 100000, replace=False)
-            point = point[indices]
-            color = color[indices]
-    except Exception as e:
-        print(e)
-        point = np.random.rand(8000, 3)
-        color = np.random.rand(8000, 3)
-    fig = go.Figure(
-        data=[
-            go.Scatter3d(
-                x=point[:,0], y=point[:,1], z=point[:,2],
-                mode='markers',
-                marker=dict(size=1, color=color, opacity=0.5),
-            )
-        ],
-        layout=dict(
-            scene=dict(
-                xaxis=dict(visible=False),
-                yaxis=dict(visible=False),
-                zaxis=dict(visible=False),
-                aspectratio=dict(x=1, y=1, z=1),
-                camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
-            )
-        )
-    )
-    return fig
-iface = gr.Interface(fn=show_logo, inputs=[], outputs=gr.Plot(), title="Display Logo")
-iface.launch(share=True)

+from pickle import FALSE
 import gradio as gr
 import numpy as np
 import plotly.graph_objects as go
+from sam2point import dataset
+import sam2point.configs as configs
+from demo_utils import run_demo, create_box
+# Sample data for dropdowns
+samples = {
+  "3D Indoor Scene - S3DIS": ["Conference Room", "Restroom", "Lobby", "Office1", "Office2"],
+#    "3D Indoor Scene - ScanNet": ["Scene1", "Scene2", "Scene3", "Scene4", "Scene5"],
+  "3D Indoor Scene - ScanNet": ["Scene1", "Scene2", "Scene3", "Scene4", "Scene5", "Scene6"],
+  "3D Outdoor Driving Scene - KITTI": ["Scene1", "Scene2", "Scene3", "Scene4", "Scene5", "Scene6"],
+  "3D Outdoor Street Scene - Semantic3D": ["Scene1", "Scene2", "Scene3", "Scene4", "Scene5", "Scene6", "Scene7"],
+  "3D Object - Objaverse": ["Plant", "Lego", "Lock", "Eleplant", "Knife Rest", "Skateboard", "Popcorn Machine", "Stove", "Bus Shelter", "Thor Hammer", "Horse"],
+#   "3D Object - Objaverse": ["Plant", "Eleplant", "Knife Rest", "Skateboard", "Popcorn Machine", "Stove", "Bus Shelter", "Thor Hammer", "Horse", "Dinner Booth"],
+}
+PATH = {
+   "S3DIS": ['Area_1_conferenceRoom_1.txt', 'Area_2_WC_1.txt', 'Area_4_lobby_2.txt', 'Area_5_office_3.txt', 'Area_6_office_9.txt'],
+   # "ScanNet": ['scene0001_01.pth', 'scene0005_01.pth', 'scene0010_01.pth', 'scene0016_02.pth', 'scene0019_01.pth'],
+   "ScanNet": ['scene0005_01.pth', 'scene0010_01.pth', 'scene0016_02.pth', 'scene0019_01.pth', 'scene0000_00.pth', 'scene0002_00.pth'],
+   "Objaverse": ["plant.npy", "human.npy", "lock.npy", "elephant.npy", "knife_rest.npy", "skateboard.npy", "popcorn_machine.npy", "stove.npy", "bus_shelter.npy", "thor_hammer.npy", "horse.npy"],
+#    "Objaverse": ["plant.npy", "elephant.npy", "knife_rest.npy", "skateboard.npy", "popcorn_machine.npy", "stove.npy", "bus_shelter.npy", "thor_hammer.npy", "horse.npy", "dinner_booth.npy"],
+   "KITTI": ["scene1.npy", "scene2.npy", "scene3.npy", "scene4.npy", "scene5.npy", "scene6.npy"],
+   "Semantic3D": ["scene1.npy", "scene2.npy", "patch19.npy", "patch0.npy", "patch1.npy", "patch50.npy", "patch62.npy"]
+}
+prompt_types = ["Point", "Box", "Mask"]
+# def select(name, sample_idx):
+#     DATASET = name.split('-')[1].replace(" ", "")
+#     gr.Info(f"Visualizing {DATASET} Example {str(sample_idx)}...")
+# Function to load and display 3D scene or object
+def load_3d_scene(name, sample_idx=-1, type_=None, prompt=None, final=False, new_color=None):
+   DATASET = name.split('-')[1].replace(" ", "")
+   path = 'data/' + DATASET + '/' + PATH[DATASET][sample_idx]
+   asp, SIZE = 1., 1
+   # load data
+   print(path)
+   if DATASET == 'S3DIS':
+       point, color = dataset.load_S3DIS_sample(path, sample=True)
+       alpha = 1
+   elif DATASET == 'ScanNet':
+       point, color = dataset.load_ScanNet_sample(path)
+       alpha = 1
+   elif DATASET == 'Objaverse':
+       point, color = dataset.load_Objaverse_sample(path)
+       alpha = 1
+       SIZE = 2
+   elif DATASET == 'KITTI':
+       point, color = dataset.load_KITTI_sample(path)
+       asp = 0.3
+       alpha = 0.7
+   elif DATASET == 'Semantic3D':
+       point, color = dataset.load_Semantic3D_sample(path, sample_idx, sample=True)
+       alpha = 0.2
+   print("Loading Dataset:", DATASET, ", Point Cloud Size:", point.shape)
+   ##### Initial Showing #####
+   if not type_:
+       if point.shape[0] > 100000:
+           indices = np.random.choice(point.shape[0], 100000, replace=False)
+           point = point[indices]
+           color = color[indices]
+    #    #NOTE KITTI
+    #    mask1 = point[:, 1] <= 0.8
+    #    mask4 = point[:, 1] >= 0.6
+    #    mask2 = point[:, 0] >= 0.3
+    #    mask3 = point[:, 0] <= 0.7
+    #    mask = mask1 & mask2 & mask3 & mask4
+    #    point = point[mask]
+    #    color = color[mask]
+    #    alpha = 1
+    #    ######
+       fig = go.Figure(
+           data=[
+               go.Scatter3d(
+                   x=point[:,0], y=point[:,1], z=point[:,2],
+                   mode='markers',
+                   marker=dict(size=SIZE, color=color, opacity=alpha),
+                   name=""
+               )
+           ],
+           layout=dict(
+               scene=dict(
+                   xaxis=dict(visible=False),
+                   yaxis=dict(visible=False),
+                   zaxis=dict(visible=False),
+                   aspectratio=dict(x=1, y=1, z=asp),
+                   camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
+               )
+           )
+       )
+       return fig
+   ##### Final
+   if final:
+       color = new_color
+       green = np.array([[0.1, 0.1, 0.1]])
+       add_green = go.Scatter3d(
+           x=green[:,0], y=green[:,1], z=green[:,2],
+           mode='markers',
+           marker=dict(size=0.0001, color='green', opacity=1),
+           name="Segmentation Results"
+       )
+   if type_ == "box":
+       if point.shape[0] > 100000:
+           indices = np.random.choice(point.shape[0], 100000, replace=False)
+           point = point[indices]
+           color = color[indices]
+    #    mask = point[:, 1] < 0.8
+    #    point = point[mask]
+    #    color = color[mask]
+    #    alpha = 1
+       scatter = go.Scatter3d(
+           x=point[:,0], y=point[:,1], z=point[:,2],
+           mode='markers',
+           marker=dict(size=SIZE, color=color, opacity=alpha),
+           name="3D Object/Scene"
+       )
+       if final: scatter = [scatter, add_green] + create_box(prompt)
+       else: scatter = [scatter] + create_box(prompt)
+   elif type_ == "point":
+       prompt = np.array([prompt])
+       new = go.Scatter3d(
+               x=prompt[:,0], y=prompt[:,1], z=prompt[:,2],
+               mode='markers',
+            #    marker=dict(size=5, color='red', opacity=1),
+            #    marker=dict(size=5, color='rgb(255, 140, 0)', opacity=1),
+               marker=dict(size=5, color='rgb(139, 0, 0)', opacity=1),
+               name="Point Prompt"
+       )
+       # print(point.shape, color.shape, new_color.shape)
+       if point.shape[0] > 100000:
+           indices = np.random.choice(point.shape[0], 100000, replace=False)
+           point = point[indices]
+           color = color[indices]
+    #    #NOTE KITTI
+    #    mask1 = point[:, 1] <= 0.8
+    #    mask = point[:, 1] >= 0.35  #2
+    #    < 0.63  #3
+    #    mask2 = point[:, 0] >= 0.3
+    #    mask3 = point[:, 0] <= 0.7
+    #    mask = mask1 & mask2 & mask3 & mask4
+    #    #NOTE S3DIS
+    #    if DATASET == 'S3DIS':
+    #     mask = point[:, 0] > 0.04
+    #    point = point[mask]
+    #    color = color[mask]
+    #     alpha = 1
+    #    ######
+       scatter = go.Scatter3d(
+           x=point[:,0], y=point[:,1], z=point[:,2],
+           mode='markers',
+           marker=dict(size=SIZE, color=color, opacity=alpha),
+           name="3D Object/Scene"
+       )
+       if final:   scatter = [scatter, new, add_green]
+       else:       scatter = [scatter, new]
+   elif type_ == 'mask' and not final:
+       color = np.clip(prompt * 255, 0, 255).astype(np.uint8)
+       if point.shape[0] > 100000:
+           indices = np.random.choice(point.shape[0], 100000, replace=False)
+           point = point[indices]
+           color = color[indices]
+       scatter = go.Scatter3d(
+           x=point[:,0], y=point[:,1], z=point[:,2],
+           mode='markers',
+           marker=dict(size=SIZE, color=color, opacity=alpha),
+           name="3D Object/Scene"
+       )
+       red = np.array([[0.1, 0.1, 0.1]])
+       add_red = go.Scatter3d(
+           x=red[:,0], y=red[:,1], z=red[:,2],
+           mode='markers',
+           marker=dict(size=0.0001, color='red', opacity=1),
+           name="Mask Prompt"
+       )
+       scatter = [scatter, add_red]
+   elif type_ == 'mask' and final:
+       if point.shape[0] > 100000:
+           indices = np.random.choice(point.shape[0], 100000, replace=False)
+           point = point[indices]
+           color = color[indices]
+    #    # cut
+    #    mask = point[:, 0] > 0.1
+    #    point = point[mask]
+    #    color = color[mask]
+    #    alpha = 1
+    #    ######
+       scatter = go.Scatter3d(
+           x=point[:,0], y=point[:,1], z=point[:,2],
+           mode='markers',
+           marker=dict(size=SIZE, color=color, opacity=alpha),
+           name="3D Object/Scene"
+       )
+       scatter = [scatter, add_green]
+       print(point.shape, color.shape)
+   else:
+       print("Wrong Prompt Type")
+       exit(1)
+   fig = go.Figure(
+       data=scatter,
+       layout=dict(
+           scene=dict(
+               xaxis=dict(visible=False),
+               yaxis=dict(visible=False),
+               zaxis=dict(visible=False),
+               aspectratio=dict(x=1, y=1, z=asp),
+               camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
+           )
+       )
+   )
+   return fig
+# Function to display prompt in 3D
+def show_prompt_in_3d(name, sample_idx, prompt_type, prompt_idx):
+   DATASET = name.split('-')[1].replace(" ", "")
+   TYPE = prompt_type.lower()
+   theta = 0. if DATASET in "S3DIS ScanNet" else 0.5
+   mode = "bilinear" if DATASET in "S3DIS ScanNet" else 'nearest'
+   prompt = run_demo(DATASET, TYPE, sample_idx, prompt_idx, 0.02, theta, mode, ret_prompt=True)
+   fig = load_3d_scene(name, sample_idx, TYPE, prompt)
+   return fig
+# Function to start segmentation
+def start_segmentation(name=None, sample_idx=None, prompt_type=None, prompt_idx=None, vx=0.02):
+   if name == None or sample_idx == None or prompt_type == None or prompt_idx == None:
+       return gr.Plot(), gr.Textbox(label="Response", value="Please ensure all options are selected.", visible=True)
+   DATASET = name.split('-')[1].replace(" ", "")
+   TYPE = prompt_type.lower()
+   theta = 0. if DATASET in "S3DIS ScanNet" else 0.5
+   mode = "bilinear" if DATASET in "S3DIS ScanNet" else 'nearest'
+   new_color, prompt = run_demo(DATASET, TYPE, sample_idx, prompt_idx, vx, theta, mode, ret_prompt=False)
+   fig = load_3d_scene(name, sample_idx, TYPE, prompt, final=True, new_color=new_color)
+   return fig, gr.Textbox(label="Response", value="Segmentation completed successfully!", visible=True)
+def update1(datasets):
+   if 'Objaverse' in datasets:
+       return gr.Radio(label="Select 3D Object", choices=samples[datasets]), gr.Textbox(label="Response", value="", visible=True) #, gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=0.02)
+   return gr.Radio(label="Select 3D Scene", choices=samples[datasets]), gr.Textbox(label="Response", value="", visible=True) #, gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=0.02)
+def update2(name, sample_idx, prompt_type):
+   if name == None or sample_idx == None or prompt_type == None:
+       return gr.Radio(label="Select Prompt Example", choices=[]), gr.Textbox(label="Response", value="", visible=True) #, gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=0.02)
+   DATASET = name.split('-')[1].replace(" ", "")
+   TYPE = prompt_type.lower() + '_prompts'
+#    if DATASET in "ScanNet" and prompt_type == 'Mask':  TYPE = 'point_prompts'
+   if DATASET == 'S3DIS':
+       info = configs.S3DIS_samples[sample_idx][TYPE]
+   elif DATASET == 'ScanNet':
+       info = configs.ScanNet_samples[sample_idx][TYPE]
+   elif DATASET == 'Objaverse':
+       info = configs.Objaverse_samples[sample_idx][TYPE]
+   elif DATASET == 'KITTI':
+       info = configs.KITTI_samples[sample_idx][TYPE]
+   elif DATASET == 'Semantic3D':
+       info = configs.Semantic3D_samples[sample_idx][TYPE]
+   cur = ['Example ' + str(i) for i in range(1, len(info) + 1)]
+   return gr.Radio(label="Select Prompt Example", choices=cur), gr.Textbox(label="Response", value="", visible=True) #, gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=0.02)
+def update3(name, sample_idx, prompt_type, prompt_idx):
+   if name == None or sample_idx == None or prompt_type == None:
+       return gr.Textbox(label="Response", value="", visible=True), gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=0.02)
+   DATASET = name.split('-')[1].replace(" ", "")
+   TYPE = configs.VOXEL[prompt_type.lower()]
+   if DATASET in "S3DIS ScanNet":
+       vx_ = 0.02
+   elif DATASET == 'Objaverse':
+       vx_ = configs.Objaverse_samples[sample_idx][TYPE][prompt_idx]
+   elif DATASET == 'KITTI':
+       vx_ = configs.KITTI_samples[sample_idx][TYPE][prompt_idx]
+   elif DATASET == 'Semantic3D':
+       vx_ = configs.Semantic3D_samples[sample_idx][TYPE][prompt_idx]
+   return gr.Textbox(label="Response", value="", visible=True), gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=vx_)
+def main():
+   title = """<h1 style="font-variant: small-caps; font-weight: bold; text-align: center;" align="center">SAM2Point</h1>
+   <h3 align="center"><b>Segment Any 3D as Videos in Zero-shot and Promptable Manners</h3>
+   <br>
+   """
+   title = """
+    <h1 style="text-align: center;">
+    <div style="width: 1.2em; height: 1.2em; display: inline-block;"><img src="https://github.com/ZiyuGuo99/ZiyuGuo99.github.io/blob/main/assets/img/logo.png?raw=true" style='width: 100%; height: 100%; object-fit: contain;' /></div>
+    <span style="font-variant: small-caps; font-weight: bold;">Sam2Point</span>
+    </h1>
+    <h3 align="center"><span style="font-variant: small-caps; ">Segment Any 3D as Videos in Zero-shot and Promptable Manners
+    </span></h3>"""
+   with gr.Blocks(
+       css="""
+       .contain { display: flex; flex-direction: column; }
+       .gradio-container { height: 100vh !important; }
+       #col_container { height: 100%; }
+       pre {
+       white-space: pre-wrap;       /* Since CSS 2.1 */
+       white-space: -moz-pre-wrap;  /* Mozilla, since 1999 */
+       white-space: -pre-wrap;      /* Opera 4-6 */
+       white-space: -o-pre-wrap;    /* Opera 7 */
+       word-wrap: break-word;       /* Internet Explorer 5.5+ */
+       }""",
+       js="""
+       function refresh() {
+           const url = new URL(window.location);
+           if (url.searchParams.get('__theme') !== 'light') {
+               url.searchParams.set('__theme', 'light');
+               window.location.href = url.href;
+           }
+       }""",
+       title="SAM2Point: Segment Any 3D as Videos in Zero-shot and Promptable Manners",
+       theme=gr.themes.Soft()
+   ) as app:
+       gr.HTML(title)
+       with gr.Row():
+           with gr.Column(elem_id="col_container"):
+               sample_dropdown = gr.Dropdown(label="Select 3D Data Type", choices=samples, type="value")
+               scene_dropdown = gr.Radio(label="Select 3D Object/Scene", choices=[], type="index")
+               show_button = gr.Button("Show 3D Scene/Object")
+               prompt_type_dropdown = gr.Radio(label="Select Prompt Type", choices=prompt_types)
+               prompt_sample_dropdown = gr.Radio(label="Select Prompt Example", choices=[], type="index")
+               show_prompt_button = gr.Button("Show Prompt in 3D Scene/Object")
+               # show_button.input(select, [sample_dropdown, scene_dropdown], [])
+           with gr.Column():
+               # vx = gr.Slider(minimum=0.01, maximum=0.15, step=0.001, label="Voxel Size", value=0.02)
+               start_segment_button = gr.Button("Start Segmentation")
+               plot1 = gr.Plot()
+       response = gr.Textbox(label="Response")
+       sample_dropdown.change(update1, sample_dropdown, [scene_dropdown, response])
+       sample_dropdown.change(update2, [sample_dropdown, scene_dropdown, prompt_type_dropdown], [prompt_sample_dropdown, response])
+       scene_dropdown.change(update2, [sample_dropdown, scene_dropdown, prompt_type_dropdown], [prompt_sample_dropdown, response])
+       prompt_type_dropdown.change(update2, [sample_dropdown, scene_dropdown, prompt_type_dropdown], [prompt_sample_dropdown, response])
+       # sample_dropdown.change(update1, sample_dropdown, [scene_dropdown, response, vx])
+       # sample_dropdown.change(update2, [sample_dropdown, scene_dropdown, prompt_type_dropdown], [prompt_sample_dropdown, response, vx])
+       # scene_dropdown.change(update2, [sample_dropdown, scene_dropdown, prompt_type_dropdown], [prompt_sample_dropdown, response, vx])
+       # prompt_type_dropdown.change(update2, [sample_dropdown, scene_dropdown, prompt_type_dropdown], [prompt_sample_dropdown, response, vx])
+       # prompt_sample_dropdown.change(update3, [sample_dropdown, scene_dropdown, prompt_type_dropdown, prompt_sample_dropdown], [response, vx])
+       # Logic to handle interactions
+       show_button.click(load_3d_scene, inputs=[sample_dropdown, scene_dropdown], outputs=plot1)
+       show_prompt_button.click(show_prompt_in_3d, inputs=[sample_dropdown, scene_dropdown, prompt_type_dropdown, prompt_sample_dropdown], outputs=plot1)
+       # start_segment_button.click(start_segmentation, inputs=[sample_dropdown, scene_dropdown, prompt_type_dropdown, prompt_sample_dropdown, vx], outputs=[plot1, response])
+       start_segment_button.click(start_segmentation, inputs=[sample_dropdown, scene_dropdown, prompt_type_dropdown, prompt_sample_dropdown], outputs=[plot1, response])
+   app.queue(status_update_rate="auto")
+   app.launch(share=True, favicon_path="./logo.png")
+if __name__ == "__main__":
+   main()