Spaces:

segments
/

panoptic-segment-anything

Runtime error

App Files Files Community

Tobias Cornille commited on Apr 18, 2023

Commit

94040eb

1 Parent(s): d197a83

Add Segments.ai output to Gradio

Browse files

Files changed (1) hide show

app.py +88 -36

app.py CHANGED Viewed

@@ -18,21 +18,21 @@ if not os.path.exists("./sam_vit_h_4b8939.pth"):
     )
     print(f"wget sam_vit_h_4b8939.pth result = {result}")
-import gradio as gr
 import argparse
 import random
 import warnings
 import numpy as np
-import matplotlib.pyplot as plt
 import torch
 from torch import nn
 import torch.nn.functional as F
 from scipy import ndimage
 from PIL import Image
 from huggingface_hub import hf_hub_download
-from segments.export import colorize
 from segments.utils import bitmap2file
 # Grounding DINO
@@ -262,6 +262,28 @@ def sam_mask_from_points(predictor, image_array, points):
     return upsampled_pred
 def generate_panoptic_mask(
     image,
     thing_category_names_string,
@@ -271,26 +293,44 @@ def generate_panoptic_mask(
     segmentation_background_threshold=0.1,
     shrink_kernel_size=20,
     num_samples_factor=1000,
 ):
-    # parse inputs
-    thing_category_names = [
-        thing_category_name.strip()
-        for thing_category_name in thing_category_names_string.split(",")
-    ]
-    stuff_category_names = [
-        stuff_category_name.strip()
-        for stuff_category_name in stuff_category_names_string.split(",")
-    ]
-    category_names = thing_category_names + stuff_category_names
-    category_name_to_id = {
-        category_name: i for i, category_name in enumerate(category_names)
-    }
     image = image.convert("RGB")
     image_array = np.asarray(image)
     # detect boxes for "thing" categories using Grounding DINO
-    thing_boxes, category_ids = dino_detection(
         dino_model,
         image,
         image_array,
@@ -360,14 +400,21 @@ def generate_panoptic_mask(
     panoptic_names = (
         ["background"]
         + stuff_category_names
-        + [category_names[category_id] for category_id in category_ids]
     )
     subsection_label_pairs = [
         (panoptic_bool_masks[i], panoptic_name)
         for i, panoptic_name in enumerate(panoptic_names)
     ]
-    return (image_array, subsection_label_pairs)
 config_file = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
@@ -465,9 +512,27 @@ if __name__ == "__main__":
                             value=1000,
                             step=1,
                         )
                 with gr.Column():
                     annotated_image = gr.AnnotatedImage()
             examples = gr.Examples(
                 examples=[
@@ -475,21 +540,11 @@ if __name__ == "__main__":
                         "a2d2.png",
                         "car, bus, person",
                         "road, sky, buildings, sidewalk",
-                        0.3,
-                        0.25,
-                        0.1,
-                        20,
-                        1000,
                     ],
                     [
                         "bxl.png",
                         "car, tram, motorcycle, person",
                         "road, buildings, sky",
-                        0.3,
-                        0.25,
-                        0.1,
-                        20,
-                        1000,
                     ],
                 ],
                 fn=generate_panoptic_mask,
@@ -497,13 +552,8 @@ if __name__ == "__main__":
                     input_image,
                     thing_category_names_string,
                     stuff_category_names_string,
-                    box_threshold,
-                    text_threshold,
-                    segmentation_background_threshold,
-                    shrink_kernel_size,
-                    num_samples_factor,
                 ],
-                outputs=[annotated_image],
                 cache_examples=True,
             )
@@ -518,8 +568,10 @@ if __name__ == "__main__":
                 segmentation_background_threshold,
                 shrink_kernel_size,
                 num_samples_factor,
             ],
-            outputs=[annotated_image],
         )
     block.launch(server_name="0.0.0.0", debug=args.debug, share=args.share)

     )
     print(f"wget sam_vit_h_4b8939.pth result = {result}")
 import argparse
 import random
 import warnings
+import json
+import tempfile
+import gradio as gr
 import numpy as np
 import torch
 from torch import nn
 import torch.nn.functional as F
 from scipy import ndimage
 from PIL import Image
 from huggingface_hub import hf_hub_download
 from segments.utils import bitmap2file
 # Grounding DINO
     return upsampled_pred
+def inds_to_segments_format(
+    panoptic_inds, thing_category_ids, stuff_category_ids, output_file
+):
+    panoptic_inds_array = panoptic_inds.numpy().astype(np.uint32)
+    bitmap_file = bitmap2file(panoptic_inds_array, is_segmentation_bitmap=True)
+    output_file.write(bitmap_file)
+    unique_inds = np.unique(panoptic_inds_array)
+    stuff_annotations = [
+        {"id": i + 1, "category_id": stuff_category_id}
+        for i, stuff_category_id in enumerate(stuff_category_ids)
+        if i in unique_inds
+    ]
+    thing_annotations = [
+        {"id": len(stuff_category_ids) + 1 + i, "category_id": thing_category_id}
+        for i, thing_category_id in enumerate(thing_category_ids)
+    ]
+    annotations = stuff_annotations + thing_annotations
+    return annotations
 def generate_panoptic_mask(
     image,
     thing_category_names_string,
     segmentation_background_threshold=0.1,
     shrink_kernel_size=20,
     num_samples_factor=1000,
+    task_attributes_json=None,
 ):
+    if task_attributes_json is not None:
+        task_attributes = json.loads(task_attributes_json)
+        categories = task_attributes["categories"]
+        category_name_to_id = {
+            category["name"]: category["id"] for category in categories
+        }
+        # split the categories into "stuff" categories (regions w/o instances)
+        # and "thing" categories (objects/instances)
+        stuff_categories = [
+            category for category in categories if not category["has_instances"]
+        ]
+        thing_categories = [
+            category for category in categories if category["has_instances"]
+        ]
+        stuff_category_names = [category["name"] for category in stuff_categories]
+        thing_category_names = [category["name"] for category in thing_categories]
+    else:
+        # parse inputs
+        thing_category_names = [
+            thing_category_name.strip()
+            for thing_category_name in thing_category_names_string.split(",")
+        ]
+        stuff_category_names = [
+            stuff_category_name.strip()
+            for stuff_category_name in stuff_category_names_string.split(",")
+        ]
+        category_names = thing_category_names + stuff_category_names
+        category_name_to_id = {
+            category_name: i for i, category_name in enumerate(category_names)
+        }
     image = image.convert("RGB")
     image_array = np.asarray(image)
     # detect boxes for "thing" categories using Grounding DINO
+    thing_boxes, thing_category_ids = dino_detection(
         dino_model,
         image,
         image_array,
     panoptic_names = (
         ["background"]
         + stuff_category_names
+        + [category_names[category_id] for category_id in thing_category_ids]
     )
     subsection_label_pairs = [
         (panoptic_bool_masks[i], panoptic_name)
         for i, panoptic_name in enumerate(panoptic_names)
     ]
+    temp_file = tempfile.NamedTemporaryFile(suffix=".png")
+    stuff_category_ids = [category_name_to_id[name] for name in stuff_category_names]
+    annotations = inds_to_segments_format(
+        panoptic_inds, thing_category_ids, stuff_category_ids, temp_file
+    )
+    annotations_json = json.dumps(annotations)
+    return (image_array, subsection_label_pairs), temp_file.name, annotations_json
 config_file = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
                             value=1000,
                             step=1,
                         )
+                        task_attributes_json = gr.Textbox(
+                            label="Task attributes JSON",
+                        )
                 with gr.Column():
                     annotated_image = gr.AnnotatedImage()
+                    with gr.Accordion("Segmentation bitmap", open=False):
+                        segmentation_bitmap_text = gr.Markdown(
+                            """
+The segmentation bitmap is a 32-bit RGBA png image which contains the segmentation masks.
+The alpha channel is set to 255, and the remaining 24-bit values in the RGB channels correspond to the object ids in the annotations list.
+Unlabeled regions have a value of 0.
+Because of the large dynamic range, these png images may appear black in an image viewer.
+"""
+                        )
+                        segmentation_bitmap = gr.Image(
+                            type="filepath", label="Segmentation bitmap"
+                        )
+                        annotations_json = gr.Textbox(
+                            label="Annotations JSON",
+                        )
             examples = gr.Examples(
                 examples=[
                         "a2d2.png",
                         "car, bus, person",
                         "road, sky, buildings, sidewalk",
                     ],
                     [
                         "bxl.png",
                         "car, tram, motorcycle, person",
                         "road, buildings, sky",
                     ],
                 ],
                 fn=generate_panoptic_mask,
                     input_image,
                     thing_category_names_string,
                     stuff_category_names_string,
                 ],
+                outputs=[annotated_image, segmentation_bitmap, annotations_json],
                 cache_examples=True,
             )
                 segmentation_background_threshold,
                 shrink_kernel_size,
                 num_samples_factor,
+                task_attributes_json,
             ],
+            outputs=[annotated_image, segmentation_bitmap, annotations_json],
+            api_name="segment",
         )
     block.launch(server_name="0.0.0.0", debug=args.debug, share=args.share)