Spaces:

google
/

zero-shot-sg1-sg2

Running

App Files Files Community

ariG23498 HF Staff commited on Feb 19

Commit

0553ee9

1 Parent(s): ba8b411

adding examples

Browse files

Files changed (3) hide show

app.py +12 -9
cat.jpg +0 -0
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 """This space is taken and modified from https://huggingface.co/spaces/merve/compare_clip_siglip"""
 import torch
-from transformers import AutoModel, AutoProcessor
-import numpy as np
 import gradio as gr
-import spaces
 ################################################################################
 # Load the models
@@ -12,7 +13,7 @@ sg1_ckpt = "google/siglip-so400m-patch14-384"
 siglip1_model = AutoModel.from_pretrained(sg1_ckpt, device_map="auto").eval()
 siglip1_processor = AutoProcessor.from_pretrained(sg1_ckpt)
-sg2_ckpt = "s0225/siglip2-so400m-patch14-384"
 siglip2_model = AutoModel.from_pretrained(sg2_ckpt, device_map="auto").eval()
 siglip2_processor = AutoProcessor.from_pretrained(sg2_ckpt)
@@ -24,11 +25,10 @@ def postprocess(output):
 def postprocess_siglip(sg1_probs, sg2_probs, labels):
-    sg1_output = {labels[i]: float(np.array(sg1_probs[0])[i]) for i in range(len(labels))}
-    sg2_output = {labels[i]: float(np.array(sg2_probs[0])[i]) for i in range(len(labels))}
     return sg1_output, sg2_output
-@spaces.GPU
 def siglip_detector(image, texts):
     sg1_inputs = siglip1_processor(
         text=texts, images=image, return_tensors="pt", padding="max_length", max_length=64
@@ -73,13 +73,16 @@ with gr.Blocks() as demo:
             siglip1_output = gr.Label(label="SigLIP 1 Output", num_top_classes=3)
             siglip2_output = gr.Label(label="SigLIP 2 Output", num_top_classes=3)
-    examples = [["./baklava.jpg", "baklava, souffle, tiramisu"]]
     gr.Examples(
         examples=examples,
         inputs=[image_input, text_input],
         outputs=[siglip1_output, siglip2_output],
         fn=infer,
-        cache_examples=True,
     )
     run_button.click(
         fn=infer, inputs=[image_input, text_input], outputs=[siglip1_output, siglip2_output]

 """This space is taken and modified from https://huggingface.co/spaces/merve/compare_clip_siglip"""
 import torch
+from transformers import (
+    AutoModel,
+    AutoProcessor
+)
 import gradio as gr
 ################################################################################
 # Load the models
 siglip1_model = AutoModel.from_pretrained(sg1_ckpt, device_map="auto").eval()
 siglip1_processor = AutoProcessor.from_pretrained(sg1_ckpt)
+sg2_ckpt = "google/siglip2-so400m-patch14-384"
 siglip2_model = AutoModel.from_pretrained(sg2_ckpt, device_map="auto").eval()
 siglip2_processor = AutoProcessor.from_pretrained(sg2_ckpt)
 def postprocess_siglip(sg1_probs, sg2_probs, labels):
+    sg1_output = {labels[i]: float(sg1_probs[0].cpu().numpy()[i]) for i in range(len(labels))}
+    sg2_output = {labels[i]: float(sg2_probs[0].cpu().numpy()[i]) for i in range(len(labels))}
     return sg1_output, sg2_output
 def siglip_detector(image, texts):
     sg1_inputs = siglip1_processor(
         text=texts, images=image, return_tensors="pt", padding="max_length", max_length=64
             siglip1_output = gr.Label(label="SigLIP 1 Output", num_top_classes=3)
             siglip2_output = gr.Label(label="SigLIP 2 Output", num_top_classes=3)
+    examples = [
+        ["./baklava.jpg", "desser on a plate, a serving of baklava, a plate and spoon"],
+        ["./baklava.jpg", "a cat, two cats, three cats"],
+        ["./baklava.jpg", "two sleeping cats, two cats playing, three cats laying down"],
+        ]
     gr.Examples(
         examples=examples,
         inputs=[image_input, text_input],
         outputs=[siglip1_output, siglip2_output],
         fn=infer,
     )
     run_button.click(
         fn=infer, inputs=[image_input, text_input], outputs=[siglip1_output, siglip2_output]

cat.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -4,5 +4,4 @@ git+https://github.com/huggingface/transformers@main
 sentencepiece
 pillow
 protobuf
-accelerate
-spaces

 sentencepiece
 pillow
 protobuf
+accelerate