Update app.py
Browse files
app.py
CHANGED
|
@@ -2363,11 +2363,40 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2363 |
logger.info("🔍 Running similarity matching…")
|
| 2364 |
os.makedirs(project_folder, exist_ok=True)
|
| 2365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2366 |
# ----------------------------------------
|
| 2367 |
-
#
|
| 2368 |
backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 2369 |
sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 2370 |
-
code_blocks_path
|
| 2371 |
# ----------------------------------------
|
| 2372 |
|
| 2373 |
project_json_path = os.path.join(project_folder, "project.json")
|
|
@@ -2375,23 +2404,26 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2375 |
# ==============================
|
| 2376 |
# READ SPRITE METADATA
|
| 2377 |
# ==============================
|
| 2378 |
-
# with open(input_json_path, 'r') as f:
|
| 2379 |
-
# sprites_data = json.load(f)
|
| 2380 |
-
|
| 2381 |
sprite_ids, sprite_base64 = [], []
|
| 2382 |
for sid, sprite in sprites_data.items():
|
| 2383 |
sprite_ids.append(sid)
|
| 2384 |
-
# texts.append("This is " + sprite.get("description", sprite.get("name", "")))
|
| 2385 |
sprite_base64.append(sprite["base64"])
|
| 2386 |
|
|
|
|
| 2387 |
sprite_images_bytes = []
|
| 2388 |
for b64 in sprite_base64:
|
| 2389 |
-
|
| 2390 |
-
|
| 2391 |
-
|
| 2392 |
-
|
| 2393 |
-
|
| 2394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2395 |
# =========================================
|
| 2396 |
# Build the list of all candidate images
|
| 2397 |
# =========================================
|
|
@@ -3458,21 +3490,21 @@ SPRITE_DIR/"Zebra.sprite3"/"f3e322a25b9f79801066056de6f33fb1.png"
|
|
| 3458 |
# feats = clip_embd.embed_image([buf])[0] # extract CLIP embedding
|
| 3459 |
# sprite_features.append(feats)
|
| 3460 |
|
| 3461 |
-
sprite_features = []
|
| 3462 |
-
for sprite in sprites_data.values(): # assuming dict like {"Sprite 1": {...}, ...}
|
| 3463 |
-
|
| 3464 |
-
|
| 3465 |
-
|
| 3466 |
|
| 3467 |
-
|
| 3468 |
-
|
| 3469 |
|
| 3470 |
-
|
| 3471 |
-
|
| 3472 |
-
|
| 3473 |
|
| 3474 |
-
|
| 3475 |
-
|
| 3476 |
|
| 3477 |
# sprite_matrix = np.array(sprite_features, dtype=np.float32)
|
| 3478 |
# # ============================== #
|
|
@@ -3507,29 +3539,97 @@ SPRITE_DIR/"Zebra.sprite3"/"f3e322a25b9f79801066056de6f33fb1.png"
|
|
| 3507 |
# img_matrix = l2_normalize_rows(img_matrix)
|
| 3508 |
# sprite_features = clip_embd.embed_image(sprite_base_path)
|
| 3509 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3510 |
# ============================== #
|
| 3511 |
# COMPUTE SIMILARITIES #
|
| 3512 |
# ============================== #
|
| 3513 |
with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
|
| 3514 |
embedding_json = json.load(f)
|
| 3515 |
-
# print(f"\n\n EMBEDDING JSON: {embedding_json}")
|
| 3516 |
-
|
| 3517 |
-
img_matrix = np.array([img["embeddings"] for img in embedding_json])
|
| 3518 |
-
sprite_matrix = np.array(sprite_features)
|
| 3519 |
-
|
| 3520 |
-
similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 3521 |
-
most_similar_indices = np.argmax(similarity, axis=1)
|
| 3522 |
|
| 3523 |
-
|
| 3524 |
-
|
| 3525 |
-
|
| 3526 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3527 |
most_similar_indices = np.argmax(similarity, axis=1)
|
| 3528 |
|
| 3529 |
# =========================================
|
| 3530 |
# Copy matched sprite assets + collect data
|
| 3531 |
# =========================================
|
| 3532 |
-
project_data
|
| 3533 |
copied_folders = set()
|
| 3534 |
|
| 3535 |
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
|
|
|
| 2363 |
logger.info("🔍 Running similarity matching…")
|
| 2364 |
os.makedirs(project_folder, exist_ok=True)
|
| 2365 |
|
| 2366 |
+
# # ----------------------------------------
|
| 2367 |
+
# # CHANGED: define normalized base-paths so startswith() checks work
|
| 2368 |
+
# backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 2369 |
+
# sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 2370 |
+
# code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
|
| 2371 |
+
# # ----------------------------------------
|
| 2372 |
+
|
| 2373 |
+
# project_json_path = os.path.join(project_folder, "project.json")
|
| 2374 |
+
|
| 2375 |
+
# # ==============================
|
| 2376 |
+
# # READ SPRITE METADATA
|
| 2377 |
+
# # ==============================
|
| 2378 |
+
# # with open(input_json_path, 'r') as f:
|
| 2379 |
+
# # sprites_data = json.load(f)
|
| 2380 |
+
|
| 2381 |
+
# sprite_ids, sprite_base64 = [], []
|
| 2382 |
+
# for sid, sprite in sprites_data.items():
|
| 2383 |
+
# sprite_ids.append(sid)
|
| 2384 |
+
# # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
|
| 2385 |
+
# sprite_base64.append(sprite["base64"])
|
| 2386 |
+
|
| 2387 |
+
# sprite_images_bytes = []
|
| 2388 |
+
# for b64 in sprite_base64:
|
| 2389 |
+
# img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
|
| 2390 |
+
# buffer = BytesIO()
|
| 2391 |
+
# img.save(buffer, format="PNG")
|
| 2392 |
+
# buffer.seek(0)
|
| 2393 |
+
# sprite_images_bytes.append(buffer)
|
| 2394 |
+
|
| 2395 |
# ----------------------------------------
|
| 2396 |
+
# normalized base-paths
|
| 2397 |
backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 2398 |
sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 2399 |
+
code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
|
| 2400 |
# ----------------------------------------
|
| 2401 |
|
| 2402 |
project_json_path = os.path.join(project_folder, "project.json")
|
|
|
|
| 2404 |
# ==============================
|
| 2405 |
# READ SPRITE METADATA
|
| 2406 |
# ==============================
|
|
|
|
|
|
|
|
|
|
| 2407 |
sprite_ids, sprite_base64 = [], []
|
| 2408 |
for sid, sprite in sprites_data.items():
|
| 2409 |
sprite_ids.append(sid)
|
|
|
|
| 2410 |
sprite_base64.append(sprite["base64"])
|
| 2411 |
|
| 2412 |
+
# === decode base64 into BytesIO list (you already did similar) ===
|
| 2413 |
sprite_images_bytes = []
|
| 2414 |
for b64 in sprite_base64:
|
| 2415 |
+
try:
|
| 2416 |
+
raw = b64.split(",", 1)[-1]
|
| 2417 |
+
img = Image.open(BytesIO(base64.b64decode(raw))).convert("RGB")
|
| 2418 |
+
buf = BytesIO()
|
| 2419 |
+
img.save(buf, format="PNG")
|
| 2420 |
+
buf.seek(0)
|
| 2421 |
+
sprite_images_bytes.append(buf)
|
| 2422 |
+
except Exception as e:
|
| 2423 |
+
logger.warning(f"Skipping bad sprite base64: {e}")
|
| 2424 |
+
|
| 2425 |
+
|
| 2426 |
+
|
| 2427 |
# =========================================
|
| 2428 |
# Build the list of all candidate images
|
| 2429 |
# =========================================
|
|
|
|
| 3490 |
# feats = clip_embd.embed_image([buf])[0] # extract CLIP embedding
|
| 3491 |
# sprite_features.append(feats)
|
| 3492 |
|
| 3493 |
+
# sprite_features = []
|
| 3494 |
+
# for sprite in sprites_data.values(): # assuming dict like {"Sprite 1": {...}, ...}
|
| 3495 |
+
# b64 = sprite["base64"]
|
| 3496 |
+
# if "," in b64:
|
| 3497 |
+
# b64 = b64.split(",", 1)[1]
|
| 3498 |
|
| 3499 |
+
# img_bytes = base64.b64decode(b64)
|
| 3500 |
+
# pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
|
| 3501 |
|
| 3502 |
+
# buf = BytesIO()
|
| 3503 |
+
# pil_img.save(buf, format="PNG")
|
| 3504 |
+
# buf.seek(0)
|
| 3505 |
|
| 3506 |
+
# feats = clip_embd.embed_image([buf])[0]
|
| 3507 |
+
# sprite_features.append(feats)
|
| 3508 |
|
| 3509 |
# sprite_matrix = np.array(sprite_features, dtype=np.float32)
|
| 3510 |
# # ============================== #
|
|
|
|
| 3539 |
# img_matrix = l2_normalize_rows(img_matrix)
|
| 3540 |
# sprite_features = clip_embd.embed_image(sprite_base_path)
|
| 3541 |
|
| 3542 |
+
# # ============================== #
|
| 3543 |
+
# # COMPUTE SIMILARITIES V1 #
|
| 3544 |
+
# # ============================== #
|
| 3545 |
+
# with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
|
| 3546 |
+
# embedding_json = json.load(f)
|
| 3547 |
+
# # print(f"\n\n EMBEDDING JSON: {embedding_json}")
|
| 3548 |
+
|
| 3549 |
+
# img_matrix = np.array([img["embeddings"] for img in embedding_json])
|
| 3550 |
+
# sprite_matrix = np.array(sprite_features)
|
| 3551 |
+
|
| 3552 |
+
# similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 3553 |
+
# most_similar_indices = np.argmax(similarity, axis=1)
|
| 3554 |
+
|
| 3555 |
+
# # =========================================
|
| 3556 |
+
# # Compute similarities & pick best match
|
| 3557 |
+
# # =========================================
|
| 3558 |
+
# similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 3559 |
+
# most_similar_indices = np.argmax(similarity, axis=1)
|
| 3560 |
+
|
| 3561 |
+
# # =========================================
|
| 3562 |
+
# # Copy matched sprite assets + collect data
|
| 3563 |
+
# # =========================================
|
| 3564 |
+
# project_data = []
|
| 3565 |
+
# copied_folders = set()
|
| 3566 |
+
|
| 3567 |
+
|
| 3568 |
+
# ==============================
|
| 3569 |
+
# EMBED SPRITES (write to disk then embed)
|
| 3570 |
+
# ==============================
|
| 3571 |
+
# Ensure sprite_base_path exists and is empty (or a temp dir)
|
| 3572 |
+
os.makedirs(sprite_base_path, exist_ok=True)
|
| 3573 |
+
# clear existing files in sprite_base_path (careful in prod)
|
| 3574 |
+
for f in os.listdir(sprite_base_path):
|
| 3575 |
+
try:
|
| 3576 |
+
os.remove(os.path.join(sprite_base_path, f))
|
| 3577 |
+
except Exception:
|
| 3578 |
+
pass
|
| 3579 |
+
|
| 3580 |
+
# Save the decoded BytesIO images as files into sprite_base_path
|
| 3581 |
+
sprite_image_paths = []
|
| 3582 |
+
for i, buf in enumerate(sprite_images_bytes):
|
| 3583 |
+
img_path = os.path.join(sprite_base_path, f"sprite_{i}.png")
|
| 3584 |
+
with open(img_path, "wb") as wf:
|
| 3585 |
+
wf.write(buf.getvalue())
|
| 3586 |
+
sprite_image_paths.append(os.path.normpath(img_path))
|
| 3587 |
+
|
| 3588 |
+
# sanity: if no sprites decoded, bail
|
| 3589 |
+
if not sprite_image_paths:
|
| 3590 |
+
raise RuntimeError("No valid sprite images to embed (sprite_image_paths empty).")
|
| 3591 |
+
|
| 3592 |
+
# Initialize embedder (you have a global clip_embd in your file; ensure it's initialized)
|
| 3593 |
+
# clip_embd = OpenCLIPEmbeddings() # if not already global
|
| 3594 |
+
|
| 3595 |
+
# Embed sprites (pass list of file paths)
|
| 3596 |
+
try:
|
| 3597 |
+
sprite_features = clip_embd.embed_image(sprite_image_paths)
|
| 3598 |
+
except Exception as e:
|
| 3599 |
+
logger.error(f"clip_embd.embed_image failed for sprite_image_paths: {e}")
|
| 3600 |
+
raise
|
| 3601 |
+
|
| 3602 |
# ============================== #
|
| 3603 |
# COMPUTE SIMILARITIES #
|
| 3604 |
# ============================== #
|
| 3605 |
with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
|
| 3606 |
embedding_json = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3607 |
|
| 3608 |
+
img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 3609 |
+
sprite_matrix = np.array(sprite_features, dtype=np.float32)
|
| 3610 |
+
|
| 3611 |
+
# sanity checks
|
| 3612 |
+
if sprite_matrix.size == 0:
|
| 3613 |
+
raise RuntimeError("No sprite embeddings (sprite_matrix empty).")
|
| 3614 |
+
if img_matrix.size == 0:
|
| 3615 |
+
raise RuntimeError("No reference embeddings (img_matrix empty).")
|
| 3616 |
+
if sprite_matrix.shape[1] != img_matrix.shape[1]:
|
| 3617 |
+
raise RuntimeError(
|
| 3618 |
+
f"Embedding dimensionality mismatch: sprite {sprite_matrix.shape} vs img {img_matrix.shape}"
|
| 3619 |
+
)
|
| 3620 |
+
|
| 3621 |
+
try:
|
| 3622 |
+
similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 3623 |
+
except Exception as e:
|
| 3624 |
+
logger.error(f"Failed to compute similarity matrix: {e}")
|
| 3625 |
+
raise
|
| 3626 |
+
|
| 3627 |
most_similar_indices = np.argmax(similarity, axis=1)
|
| 3628 |
|
| 3629 |
# =========================================
|
| 3630 |
# Copy matched sprite assets + collect data
|
| 3631 |
# =========================================
|
| 3632 |
+
project_data = []
|
| 3633 |
copied_folders = set()
|
| 3634 |
|
| 3635 |
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|