Spaces:

SmilingWolf
/

danbooru2022_embeddings_playground

Runtime error

App Files Files Community

SmilingWolf commited on Jun 9, 2024

Commit

0c14216

1 Parent(s): 69cd139

Add support for SigLIP-trained weights.

Browse files

Same network structure for now, this is just to make it easier to
compare the two while experimenting.

Files changed (2) hide show

app.py +40 -10
data/wd-v1-4-convnext-tagger-v2/siglip.msgpack +3 -0

app.py CHANGED Viewed

@@ -14,10 +14,13 @@ from Models.CLIP import CLIP
 def combine_embeddings(pos_img_embs, pos_tags_embs, neg_img_embs, neg_tags_embs):
     pos = pos_img_embs + pos_tags_embs
     neg = neg_img_embs + neg_tags_embs
     result = pos - neg
     return result
@@ -48,12 +51,9 @@ def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
 class Predictor:
     def __init__(self):
         self.base_model = "wd-v1-4-convnext-tagger-v2"
-        with open(f"data/{self.base_model}/clip.msgpack", "rb") as f:
-            data = f.read()
-        self.params = flax.serialization.msgpack_restore(data)["model"]
         self.model = CLIP()
         self.tags_df = pd.read_csv("data/selected_tags.csv")
@@ -64,12 +64,27 @@ class Predictor:
         config = json.loads(open("index/cosine_infos.json").read())["index_param"]
         faiss.ParameterSpace().set_index_parameters(self.knn_index, config)
     def predict(
         self,
         pos_img_input,
         neg_img_input,
         positive_tags,
         negative_tags,
         selected_ratings,
         n_neighbours,
         api_username,
@@ -78,6 +93,8 @@ class Predictor:
         tags_df = self.tags_df
         model = self.model
         num_classes = len(tags_df)
         output_shape = model.out_units
@@ -172,10 +189,10 @@ def main():
                 positive_tags = gr.Textbox(label="Positive tags")
                 negative_tags = gr.Textbox(label="Negative tags")
             with gr.Column():
-                selected_ratings = gr.CheckboxGroup(
-                    choices=["General", "Sensitive", "Questionable", "Explicit"],
-                    value=["General", "Sensitive"],
-                    label="Ratings",
                 )
                 n_neighbours = gr.Slider(
                     minimum=1,
@@ -185,8 +202,14 @@ def main():
                     label="# of images",
                 )
             with gr.Column():
-                api_username = gr.Textbox(label="Danbooru API Username")
-                api_key = gr.Textbox(label="Danbooru API Key")
         find_btn = gr.Button("Find similar images")
@@ -199,6 +222,7 @@ def main():
                     None,
                     "marcille_donato",
                     "",
                     ["General", "Sensitive"],
                     5,
                     "",
@@ -209,6 +233,7 @@ def main():
                     None,
                     "yellow_eyes,red_horns",
                     "",
                     ["General", "Sensitive"],
                     5,
                     "",
@@ -219,6 +244,7 @@ def main():
                     None,
                     "artoria_pendragon_(fate),solo",
                     "excalibur_(fate/stay_night),green_eyes,monochrome,blonde_hair",
                     ["General", "Sensitive"],
                     5,
                     "",
@@ -229,6 +255,7 @@ def main():
                     None,
                     "fujimaru_ritsuka_(female)",
                     "solo",
                     ["General", "Sensitive"],
                     5,
                     "",
@@ -239,6 +266,7 @@ def main():
                     "examples/46657164_p1.jpg",
                     "",
                     "",
                     ["General", "Sensitive"],
                     5,
                     "",
@@ -250,6 +278,7 @@ def main():
                 neg_img_input,
                 positive_tags,
                 negative_tags,
                 selected_ratings,
                 n_neighbours,
                 api_username,
@@ -268,6 +297,7 @@ def main():
                 neg_img_input,
                 positive_tags,
                 negative_tags,
                 selected_ratings,
                 n_neighbours,
                 api_username,

 def combine_embeddings(pos_img_embs, pos_tags_embs, neg_img_embs, neg_tags_embs):
     pos = pos_img_embs + pos_tags_embs
+    faiss.normalize_L2(pos)
     neg = neg_img_embs + neg_tags_embs
+    faiss.normalize_L2(neg)
     result = pos - neg
+    faiss.normalize_L2(result)
     return result
 class Predictor:
     def __init__(self):
+        self.loaded_variant = None
         self.base_model = "wd-v1-4-convnext-tagger-v2"
         self.model = CLIP()
         self.tags_df = pd.read_csv("data/selected_tags.csv")
         config = json.loads(open("index/cosine_infos.json").read())["index_param"]
         faiss.ParameterSpace().set_index_parameters(self.knn_index, config)
+    def load_params(self, variant):
+        if self.loaded_variant == variant:
+            return
+        if variant == "CLIP":
+            with open(f"data/{self.base_model}/clip.msgpack", "rb") as f:
+                data = f.read()
+        elif variant == "SigLIP":
+            with open(f"data/{self.base_model}/siglip.msgpack", "rb") as f:
+                data = f.read()
+        self.params = flax.serialization.msgpack_restore(data)["model"]
+        self.loaded_variant = variant
     def predict(
         self,
         pos_img_input,
         neg_img_input,
         positive_tags,
         negative_tags,
+        selected_model,
         selected_ratings,
         n_neighbours,
         api_username,
         tags_df = self.tags_df
         model = self.model
+        self.load_params(selected_model)
         num_classes = len(tags_df)
         output_shape = model.out_units
                 positive_tags = gr.Textbox(label="Positive tags")
                 negative_tags = gr.Textbox(label="Negative tags")
             with gr.Column():
+                selected_model = gr.Radio(
+                    choices=["CLIP", "SigLIP"],
+                    value="CLIP",
+                    label="Model",
                 )
                 n_neighbours = gr.Slider(
                     minimum=1,
                     label="# of images",
                 )
             with gr.Column():
+                selected_ratings = gr.CheckboxGroup(
+                    choices=["General", "Sensitive", "Questionable", "Explicit"],
+                    value=["General", "Sensitive"],
+                    label="Ratings",
+                )
+                with gr.Row():
+                    api_username = gr.Textbox(label="Danbooru API Username")
+                    api_key = gr.Textbox(label="Danbooru API Key")
         find_btn = gr.Button("Find similar images")
                     None,
                     "marcille_donato",
                     "",
+                    "CLIP",
                     ["General", "Sensitive"],
                     5,
                     "",
                     None,
                     "yellow_eyes,red_horns",
                     "",
+                    "CLIP",
                     ["General", "Sensitive"],
                     5,
                     "",
                     None,
                     "artoria_pendragon_(fate),solo",
                     "excalibur_(fate/stay_night),green_eyes,monochrome,blonde_hair",
+                    "CLIP",
                     ["General", "Sensitive"],
                     5,
                     "",
                     None,
                     "fujimaru_ritsuka_(female)",
                     "solo",
+                    "CLIP",
                     ["General", "Sensitive"],
                     5,
                     "",
                     "examples/46657164_p1.jpg",
                     "",
                     "",
+                    "CLIP",
                     ["General", "Sensitive"],
                     5,
                     "",
                 neg_img_input,
                 positive_tags,
                 negative_tags,
+                selected_model,
                 selected_ratings,
                 n_neighbours,
                 api_username,
                 neg_img_input,
                 positive_tags,
                 negative_tags,
+                selected_model,
                 selected_ratings,
                 n_neighbours,
                 api_username,

data/wd-v1-4-convnext-tagger-v2/siglip.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b541d6ed39a4df5ca2edd7e3431e936bbb61c9499026ad3365361af13aa06d06
+size 48689369