Scratch_Vision_Game_test_dup

Sleeping

App Files Files Community

prthm11 commited on Sep 13

Commit

1971458

verified ·

1 Parent(s): d60a89a

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -60

app.py CHANGED Viewed

@@ -3389,44 +3389,43 @@ SPRITE_DIR/"Zebra.sprite3"/"f3e322a25b9f79801066056de6f33fb1.png"
     folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
-    # ============================== #
-    #      EMBED SPRITE IMAGES       #
-    #      (using CLIP again)        #
-    # ============================== #
-    # Make sure all buffers are at start
-    for buf in sprite_images_bytes:
-        try:
-            buf.seek(0)
-        except Exception:
-            pass
-    # Try the fast path: embed whole list at once (many CLIP wrappers accept a list of BytesIO/PIL)
-    try:
-        sprite_matrix = clip_embd.embed_image(sprite_images_bytes, batch_size=8)
-        sprite_matrix = np.array(sprite_matrix, dtype=np.float32)
-    except Exception:
-        sprite_feats = []
-        for buf in sprite_images_bytes:
-            buf.seek(0)
-            try:
-                feats = clip_embd.embed_image([buf])[0]
-            except Exception:
-                buf.seek(0)
-                pil_img = Image.open(buf).convert("RGB")
-                try:
-                    feats = clip_embd.embed_image([pil_img])[0]
-                except Exception:
-                    pil_arr = np.array(pil_img)
-                    feats = clip_embd.embed_image([pil_arr])[0]
-            sprite_feats.append(np.asarray(feats, dtype=np.float32))
-        sprite_matrix = np.vstack(sprite_feats)  # shape (N, D)
     # --- load reference embeddings (unchanged) ---
-    with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
-        embedding_json = json.load(f)
-    img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
     # =========================================
@@ -3443,21 +3442,21 @@ SPRITE_DIR/"Zebra.sprite3"/"f3e322a25b9f79801066056de6f33fb1.png"
     # # ============================== #
     # #      EMBED SPRITE IMAGES       #
     # # ============================== #
-    # sprite_features = []
-    # for b64 in sprite_base64:
-    #     if "," in b64:  # strip data URI prefix if present
-    #         b64 = b64.split(",", 1)[1]
-    #     img_bytes = base64.b64decode(b64)
-    #     pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
-    #     # optional re-encode to PNG for CLIP
-    #     buf = BytesIO()
-    #     pil_img.save(buf, format="PNG")
-    #     buf.seek(0)
-    #     feats = clip_embd.embed_image([buf])[0]  # extract CLIP embedding
-    #     sprite_features.append(feats)
     # sprite_matrix = np.array(sprite_features, dtype=np.float32)
     # # ============================== #
@@ -3474,23 +3473,37 @@ SPRITE_DIR/"Zebra.sprite3"/"f3e322a25b9f79801066056de6f33fb1.png"
     # normalize both sides (important — stored embeddings may not be normalized)
-    def l2_normalize_rows(x: np.ndarray, eps: float = 1e-10) -> np.ndarray:
-        """
-        L2-normalize each row of a 2D numpy array.
-        Args:
-            x: Array of shape (N, D).
-            eps: Small constant to avoid division by zero.
-        Returns:
-            Normalized array of shape (N, D) where each row has unit norm.
-        """
-        norms = np.linalg.norm(x, axis=1, keepdims=True)
-        return x / np.maximum(norms, eps)
-    sprite_matrix = l2_normalize_rows(sprite_matrix)
-    img_matrix = l2_normalize_rows(img_matrix)
     # =========================================
     #  Compute similarities & pick best match
     # =========================================

     folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
+    # # ============================== #
+    # #      EMBED SPRITE IMAGES       #
+    # #      (using CLIP again)        #
+    # # ============================== #
+    # # Make sure all buffers are at start
+    # for buf in sprite_images_bytes:
+    #     try:
+    #         buf.seek(0)
+    #     except Exception:
+    #         pass
+    # # Try the fast path: embed whole list at once (many CLIP wrappers accept a list of BytesIO/PIL)
+    # try:
+    #     sprite_matrix = clip_embd.embed_image(sprite_images_bytes, batch_size=8)
+    #     sprite_matrix = np.array(sprite_matrix, dtype=np.float32)
+    # except Exception:
+    #     sprite_feats = []
+    #     for buf in sprite_images_bytes:
+    #         buf.seek(0)
+    #         try:
+    #             feats = clip_embd.embed_image([buf])[0]
+    #         except Exception:
+    #             buf.seek(0)
+    #             pil_img = Image.open(buf).convert("RGB")
+    #             try:
+    #                 feats = clip_embd.embed_image([pil_img])[0]
+    #             except Exception:
+    #                 pil_arr = np.array(pil_img)
+    #                 feats = clip_embd.embed_image([pil_arr])[0]
+    #         sprite_feats.append(np.asarray(feats, dtype=np.float32))
+    #     sprite_matrix = np.vstack(sprite_feats)  # shape (N, D)
     # --- load reference embeddings (unchanged) ---
+    # with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
+    #     embedding_json = json.load(f)
+    # img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
     # =========================================
     # # ============================== #
     # #      EMBED SPRITE IMAGES       #
     # # ============================== #
+    sprite_features = []
+    for b64 in sprite_base64:
+        if "," in b64:  # strip data URI prefix if present
+            b64 = b64.split(",", 1)[1]
+        img_bytes = base64.b64decode(b64)
+        pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
+        # optional re-encode to PNG for CLIP
+        buf = BytesIO()
+        pil_img.save(buf, format="PNG")
+        buf.seek(0)
+        feats = clip_embd.embed_image([buf])[0]  # extract CLIP embedding
+        sprite_features.append(feats)
     # sprite_matrix = np.array(sprite_features, dtype=np.float32)
     # # ============================== #
     # normalize both sides (important — stored embeddings may not be normalized)
+    # def l2_normalize_rows(x: np.ndarray, eps: float = 1e-10) -> np.ndarray:
+    #     """
+    #     L2-normalize each row of a 2D numpy array.
+    #     Args:
+    #         x: Array of shape (N, D).
+    #         eps: Small constant to avoid division by zero.
+    #     Returns:
+    #         Normalized array of shape (N, D) where each row has unit norm.
+    #     """
+    #     norms = np.linalg.norm(x, axis=1, keepdims=True)
+    #     return x / np.maximum(norms, eps)
+    # sprite_matrix = l2_normalize_rows(sprite_matrix)
+    # img_matrix = l2_normalize_rows(img_matrix)
+    sprite_features = clip_embd.embed_image(sprite_image_paths)
+    # ============================== #
+    #     COMPUTE SIMILARITIES       #
+    # ============================== #
+    with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
+        embedding_json = json.load(f)
+        # print(f"\n\n EMBEDDING JSON: {embedding_json}")
+    img_matrix = np.array([img["embeddings"] for img in embedding_json])
+    sprite_matrix = np.array(sprite_features)
+    similarity = np.matmul(sprite_matrix, img_matrix.T)
+    most_similar_indices = np.argmax(similarity, axis=1)
     # =========================================
     #  Compute similarities & pick best match
     # =========================================