clip-rsicd-demo

Build error

App Files Files Community

Sujit Pal commited on Jul 25, 2021

Commit

f58917e

1 Parent(s): 5de821f

fix: changing output format to include caption

Browse files

Files changed (3) hide show

dashboard_image2image.py +14 -13
dashboard_text2image.py +14 -16
utils.py +15 -0

dashboard_image2image.py CHANGED Viewed

@@ -12,11 +12,9 @@ import utils
 BASELINE_MODEL = "openai/clip-vit-base-patch32"
 MODEL_PATH = "flax-community/clip-rsicd-v2"
 IMAGE_VECTOR_FILE = "./vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
 IMAGES_DIR = "./images"
 @st.cache(allow_output_mutation=True)
 def load_example_images():
@@ -62,6 +60,7 @@ def download_and_prepare_image(image_url):
 def app():
     filenames, index = utils.load_index(IMAGE_VECTOR_FILE)
     model, processor = utils.load_model(MODEL_PATH, BASELINE_MODEL)
     example_image_list = load_example_images()
@@ -150,17 +149,19 @@ def app():
             query_vec = np.asarray(query_vec)
             ids, distances = index.knnQuery(query_vec, k=11)
             result_filenames = [filenames[id] for id in ids]
-            images, captions = [], []
             for result_filename, score in zip(result_filenames, distances):
                 if image_name is not None and result_filename == image_name:
                     continue
-                images.append(
-                    plt.imread(os.path.join(IMAGES_DIR, result_filename)))
-                captions.append("{:s} (score: {:.3f})".format(result_filename, 1.0 - score))
-            images = images[0:10]
-            captions = captions[0:10]
-            st.image(images[0:3], caption=captions[0:3])
-            st.image(images[3:6], caption=captions[3:6])
-            st.image(images[6:9], caption=captions[6:9])
-            st.image(images[9:], caption=captions[9:])
             suggest_idx = -1

 BASELINE_MODEL = "openai/clip-vit-base-patch32"
 MODEL_PATH = "flax-community/clip-rsicd-v2"
 IMAGE_VECTOR_FILE = "./vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
 IMAGES_DIR = "./images"
+CAPTIONS_FILE = os.path.join(IMAGES_DIR, "dataset_rsicd.json")
 @st.cache(allow_output_mutation=True)
 def load_example_images():
 def app():
     filenames, index = utils.load_index(IMAGE_VECTOR_FILE)
     model, processor = utils.load_model(MODEL_PATH, BASELINE_MODEL)
+    image2caption = utils.load_captions(CAPTIONS_FILE)
     example_image_list = load_example_images()
             query_vec = np.asarray(query_vec)
             ids, distances = index.knnQuery(query_vec, k=11)
             result_filenames = [filenames[id] for id in ids]
+            rank = 0
             for result_filename, score in zip(result_filenames, distances):
                 if image_name is not None and result_filename == image_name:
                     continue
+                caption = "{:s} (score: {:.3f})".format(result_filename, 1.0 - score)
+                col1, col2, col3 = st.beta_columns([2, 10, 10])
+                col1.markdown("{:d}.".format(rank + 1))
+                col2.image(Image.open(os.path.join(IMAGES_DIR, result_filename)),
+                        caption=caption)
+                caption_text = []
+                for caption in image2caption[result_filename]:
+                    caption_text.append("* {:s}\n".format(caption))
+                col3.markdown("".join(caption_text))
+                rank += 1
+                st.markdown("---")
             suggest_idx = -1

dashboard_text2image.py CHANGED Viewed

@@ -4,25 +4,21 @@ import numpy as np
 import os
 import streamlit as st
 from transformers import CLIPProcessor, FlaxCLIPModel
 import utils
 BASELINE_MODEL = "openai/clip-vit-base-patch32"
-# MODEL_PATH = "/home/shared/models/clip-rsicd/bs128x8-lr5e-6-adam/ckpt-1"
 MODEL_PATH = "flax-community/clip-rsicd-v2"
-# IMAGE_VECTOR_FILE = "/home/shared/data/vectors/test-baseline.tsv"
-# IMAGE_VECTOR_FILE = "/home/shared/data/vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
 IMAGE_VECTOR_FILE = "./vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
-# IMAGES_DIR = "/home/shared/data/rsicd_images"
 IMAGES_DIR = "./images"
 def app():
     filenames, index = utils.load_index(IMAGE_VECTOR_FILE)
     model, processor = utils.load_model(MODEL_PATH, BASELINE_MODEL)
     st.title("Retrieve Images given Text")
     st.markdown("""
@@ -78,13 +74,15 @@ def app():
         query_vec = np.asarray(query_vec)
         ids, distances = index.knnQuery(query_vec, k=10)
         result_filenames = [filenames[id] for id in ids]
-        images, captions = [], []
-        for result_filename, score in zip(result_filenames, distances):
-            images.append(
-                plt.imread(os.path.join(IMAGES_DIR, result_filename)))
-            captions.append("{:s} (score: {:.3f})".format(result_filename, 1.0 - score))
-        st.image(images[0:3], caption=captions[0:3])
-        st.image(images[3:6], caption=captions[3:6])
-        st.image(images[6:9], caption=captions[6:9])
-        st.image(images[9:], caption=captions[9:])
         suggest_idx = -1

 import os
 import streamlit as st
+from PIL import Image
 from transformers import CLIPProcessor, FlaxCLIPModel
 import utils
 BASELINE_MODEL = "openai/clip-vit-base-patch32"
 MODEL_PATH = "flax-community/clip-rsicd-v2"
 IMAGE_VECTOR_FILE = "./vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
 IMAGES_DIR = "./images"
+CAPTIONS_FILE = os.path.join(IMAGES_DIR, "dataset_rsicd.json")
 def app():
     filenames, index = utils.load_index(IMAGE_VECTOR_FILE)
     model, processor = utils.load_model(MODEL_PATH, BASELINE_MODEL)
+    image2caption = utils.load_captions(CAPTIONS_FILE)
     st.title("Retrieve Images given Text")
     st.markdown("""
         query_vec = np.asarray(query_vec)
         ids, distances = index.knnQuery(query_vec, k=10)
         result_filenames = [filenames[id] for id in ids]
+        for rank, (result_filename, score) in enumerate(zip(result_filenames, distances)):
+            caption = "{:s} (score: {:.3f})".format(result_filename, 1.0 - score)
+            col1, col2, col3 = st.beta_columns([2, 10, 10])
+            col1.markdown("{:d}.".format(rank + 1))
+            col2.image(Image.open(os.path.join(IMAGES_DIR, result_filename)),
+                       caption=caption)
+            caption_text = []
+            for caption in image2caption[result_filename]:
+                caption_text.append("* {:s}\n".format(caption))
+            col3.markdown("".join(caption_text))
+            st.markdown("---")
         suggest_idx = -1

utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import matplotlib.pyplot as plt
 import nmslib
 import numpy as np
@@ -31,3 +32,17 @@ def load_model(model_path, baseline_model):
     # processor = CLIPProcessor.from_pretrained(baseline_model)
     processor = CLIPProcessor.from_pretrained(model_path)
     return model, processor

+import json
 import matplotlib.pyplot as plt
 import nmslib
 import numpy as np
     # processor = CLIPProcessor.from_pretrained(baseline_model)
     processor = CLIPProcessor.from_pretrained(model_path)
     return model, processor
+@st.cache(allow_output_mutation=True)
+def load_captions(caption_file):
+    image2caption = {}
+    with open(caption_file, "r") as fcap:
+        data = json.loads(fcap.read())
+        for image in data["images"]:
+            filename = image["filename"]
+            captions = []
+            for sentence in image["sentences"]:
+                captions.append(sentence["raw"])
+            image2caption[filename] = captions
+    return image2caption