Update app.py
Browse files
app.py
CHANGED
|
@@ -32,14 +32,14 @@ from transformers import AutoImageProcessor, AutoModel
|
|
| 32 |
import faiss
|
| 33 |
from sentence_transformers import SentenceTransformer
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
|
| 43 |
|
| 44 |
# os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
|
| 45 |
# class ChatOpenRouter(ChatOpenAI):
|
|
@@ -388,7 +388,69 @@ def load_model_and_index():
|
|
| 388 |
# def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
| 389 |
# norm = np.linalg.norm(a, axis=1, keepdims=True)
|
| 390 |
# return a / (norm + eps)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
|
|
|
| 392 |
# Helper function to load the block catalog from a JSON file
|
| 393 |
def _load_block_catalog(block_type: str) -> Dict:
|
| 394 |
"""
|
|
@@ -2305,12 +2367,10 @@ def processed_page_node(state: GameState):
|
|
| 2305 |
state["processing"]= False
|
| 2306 |
return state
|
| 2307 |
|
| 2308 |
-
# Prepare manipulated sprite JSON structure
|
| 2309 |
-
manipulated_json = {}
|
| 2310 |
-
img_elements = []
|
| 2311 |
-
# { changes: "pdf_stream" in place of "pdf_path"
|
| 2312 |
def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
| 2313 |
''' Extract images from PDF and generate structured sprite JSON '''
|
|
|
|
|
|
|
| 2314 |
try:
|
| 2315 |
# {
|
| 2316 |
# pdf_path = Path(pdf_path)
|
|
@@ -2318,13 +2378,12 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 2318 |
# pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
|
| 2319 |
# print("-------------------------------pdf_filename-------------------------------",pdf_filename)
|
| 2320 |
# print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
|
| 2321 |
-
|
| 2322 |
if isinstance(pdf_stream, io.BytesIO):
|
| 2323 |
# use a random ID since there's no filename
|
| 2324 |
pdf_id = uuid.uuid4().hex
|
| 2325 |
else:
|
| 2326 |
pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
|
| 2327 |
-
|
| 2328 |
# extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
|
| 2329 |
# json_subdir = JSON_DIR / pdf_filename
|
| 2330 |
# extracted_image_subdir.mkdir(parents=True, exist_ok=True)
|
|
@@ -2339,7 +2398,6 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 2339 |
# print("-------------------------------final_json_path-------------------------------",final_json_path)
|
| 2340 |
# print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
|
| 2341 |
|
| 2342 |
-
# }
|
| 2343 |
try:
|
| 2344 |
elements = partition_pdf(
|
| 2345 |
# filename=str(pdf_path), # partition_pdf might expect a string
|
|
@@ -2348,14 +2406,18 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 2348 |
extract_image_block_types=["Image"],
|
| 2349 |
hi_res_model_name="yolox",
|
| 2350 |
extract_image_block_to_payload=True,
|
|
|
|
|
|
|
|
|
|
| 2351 |
)
|
| 2352 |
print(f"ELEMENTS")
|
| 2353 |
except Exception as e:
|
| 2354 |
raise RuntimeError(
|
| 2355 |
f"❌ Failed to extract images from PDF: {str(e)}")
|
| 2356 |
-
|
| 2357 |
file_elements = [element.to_dict() for element in elements]
|
| 2358 |
-
|
|
|
|
| 2359 |
#{
|
| 2360 |
# try:
|
| 2361 |
# with open(output_json_path, "w") as f:
|
|
@@ -2363,21 +2425,21 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 2363 |
# for element in elements], f, indent=4)
|
| 2364 |
# except Exception as e:
|
| 2365 |
# raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
|
| 2366 |
-
|
| 2367 |
# try:
|
| 2368 |
# # Display extracted images
|
| 2369 |
# with open(output_json_path, 'r') as file:
|
| 2370 |
# file_elements = json.load(file)
|
| 2371 |
# except Exception as e:
|
| 2372 |
-
# raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
|
| 2373 |
# }
|
| 2374 |
-
|
| 2375 |
sprite_count = 1
|
| 2376 |
for el in file_elements:
|
| 2377 |
img_b64 = el["metadata"].get("image_base64")
|
| 2378 |
if not img_b64:
|
| 2379 |
-
continue
|
| 2380 |
-
|
| 2381 |
manipulated_json[f"Sprite {sprite_count}"] = {
|
| 2382 |
# "id":auto_id,
|
| 2383 |
# "name": name,
|
|
@@ -2391,6 +2453,18 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 2391 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
| 2392 |
|
| 2393 |
# def similarity_matching(input_json_path: str, project_folder: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2394 |
def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
| 2395 |
logger.info("🔍 Running similarity matching…")
|
| 2396 |
os.makedirs(project_folder, exist_ok=True)
|
|
@@ -2407,9 +2481,13 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2407 |
# ==============================
|
| 2408 |
# READ SPRITE METADATA
|
| 2409 |
# ==============================
|
|
|
|
|
|
|
|
|
|
| 2410 |
sprite_ids, sprite_base64 = [], []
|
| 2411 |
for sid, sprite in sprites_data.items():
|
| 2412 |
sprite_ids.append(sid)
|
|
|
|
| 2413 |
sprite_base64.append(sprite["base64"])
|
| 2414 |
|
| 2415 |
sprite_images_bytes = []
|
|
@@ -2419,106 +2497,99 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2419 |
img.save(buffer, format="PNG")
|
| 2420 |
buffer.seek(0)
|
| 2421 |
sprite_images_bytes.append(buffer)
|
| 2422 |
-
|
| 2423 |
# =========================================
|
| 2424 |
# Build the list of all candidate images
|
| 2425 |
-
# (REPLACED: now loaded from FAISS paths JSON)
|
| 2426 |
# =========================================
|
| 2427 |
-
|
| 2428 |
-
|
| 2429 |
-
|
| 2430 |
-
|
| 2431 |
-
|
| 2432 |
-
|
| 2433 |
-
|
| 2434 |
-
|
| 2435 |
-
|
| 2436 |
-
|
| 2437 |
-
|
| 2438 |
-
|
| 2439 |
-
|
| 2440 |
-
|
| 2441 |
-
|
| 2442 |
-
|
| 2443 |
-
|
| 2444 |
-
|
| 2445 |
-
|
| 2446 |
-
|
| 2447 |
-
|
| 2448 |
-
|
| 2449 |
-
|
| 2450 |
-
|
| 2451 |
-
|
| 2452 |
-
|
| 2453 |
-
|
| 2454 |
-
|
| 2455 |
-
|
| 2456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2457 |
|
| 2458 |
# -----------------------------------------
|
| 2459 |
-
# Load
|
| 2460 |
-
# and perform search for each sprite image
|
| 2461 |
# -----------------------------------------
|
| 2462 |
-
|
| 2463 |
-
|
| 2464 |
-
|
| 2465 |
-
|
| 2466 |
-
|
| 2467 |
-
|
| 2468 |
-
|
| 2469 |
-
|
| 2470 |
-
|
| 2471 |
-
|
| 2472 |
-
|
| 2473 |
-
|
| 2474 |
-
|
| 2475 |
-
|
| 2476 |
-
|
| 2477 |
-
|
| 2478 |
-
|
| 2479 |
-
|
| 2480 |
-
|
| 2481 |
-
|
| 2482 |
-
|
| 2483 |
-
|
| 2484 |
-
|
| 2485 |
-
|
| 2486 |
-
# ensure float32 and contiguous
|
| 2487 |
-
sprite_embeddings = np.ascontiguousarray(sprite_embeddings.astype(np.float32))
|
| 2488 |
-
|
| 2489 |
-
# number of neighbors to request (we only need the top-1 to emulate previous behavior)
|
| 2490 |
-
k = 1
|
| 2491 |
-
logger.info(f"Performing FAISS search (k={k}) for all sprites...")
|
| 2492 |
-
distances, indices = index.search(sprite_embeddings, k) # distances.shape = (N, k); indices.shape = (N, k)
|
| 2493 |
-
# take top-1 index for each sprite
|
| 2494 |
-
if indices is not None and indices.shape[0] > 0:
|
| 2495 |
-
most_similar_indices = indices[:, 0].tolist()
|
| 2496 |
-
else:
|
| 2497 |
-
most_similar_indices = []
|
| 2498 |
-
except Exception as e:
|
| 2499 |
-
logger.error(f"FAISS / SentenceTransformer matching failed: {e}. Falling back to simple in-memory similarity if possible.")
|
| 2500 |
-
# fallback: if we previously loaded embedding_json, use that in-memory approach (cosine)
|
| 2501 |
-
try:
|
| 2502 |
-
if 'embedding_json' not in locals():
|
| 2503 |
-
with open(f"{BLOCKS_DIR}/embed.json", "r", encoding="utf-8") as f:
|
| 2504 |
-
embedding_json = json.load(f)
|
| 2505 |
-
# build img_matrix from embedding_json entries (assumes "embeddings" key)
|
| 2506 |
-
img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 2507 |
-
# compute embeddings for sprites using any available embed function (init_dinov2/embed_bytesio_list previously used)
|
| 2508 |
-
init_dinov2()
|
| 2509 |
-
sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8)
|
| 2510 |
-
sprite_matrix = l2_normalize_rows(sprite_matrix)
|
| 2511 |
-
img_matrix = l2_normalize_rows(img_matrix)
|
| 2512 |
-
similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 2513 |
-
most_similar_indices = np.argmax(similarity, axis=1).tolist()
|
| 2514 |
-
logger.info("Fallback in-memory similarity search succeeded.")
|
| 2515 |
-
except Exception as e2:
|
| 2516 |
-
logger.error(f"Fallback similarity also failed: {e2}")
|
| 2517 |
-
most_similar_indices = []
|
| 2518 |
-
|
| 2519 |
-
# Ensure we have a list with same length as number of sprites (or empty)
|
| 2520 |
-
if most_similar_indices is None:
|
| 2521 |
-
most_similar_indices = []
|
| 2522 |
|
| 2523 |
# =========================================
|
| 2524 |
# Copy matched sprite assets + collect data
|
|
@@ -2527,17 +2598,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2527 |
copied_folders = set()
|
| 2528 |
|
| 2529 |
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
| 2530 |
-
|
| 2531 |
-
continue
|
| 2532 |
-
|
| 2533 |
-
# protect against out-of-range indices
|
| 2534 |
-
try:
|
| 2535 |
-
matched_image_path = folder_image_paths[int(matched_idx)]
|
| 2536 |
-
except Exception as e:
|
| 2537 |
-
logger.warning(f"Matched index {matched_idx} invalid: {e}")
|
| 2538 |
-
continue
|
| 2539 |
-
|
| 2540 |
-
matched_image_path = os.path.normpath(str(matched_image_path))
|
| 2541 |
matched_folder = os.path.dirname(matched_image_path)
|
| 2542 |
|
| 2543 |
# CHANGED: use our new normalized sprite_base_path
|
|
@@ -2554,17 +2615,14 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2554 |
logger.warning(f"No sprite.json in {matched_folder}")
|
| 2555 |
continue
|
| 2556 |
|
| 2557 |
-
with open(sprite_json_path, 'r'
|
| 2558 |
sprite_info = json.load(f)
|
| 2559 |
# copy all non‐matched files
|
| 2560 |
for fname in os.listdir(matched_folder):
|
| 2561 |
if fname in (os.path.basename(matched_image_path), 'sprite.json'):
|
| 2562 |
continue
|
| 2563 |
-
|
| 2564 |
-
|
| 2565 |
-
os.path.join(project_folder, fname))
|
| 2566 |
-
except Exception as e:
|
| 2567 |
-
logger.error(f"Failed to copy sprite asset {fname} from {matched_folder}: {e}")
|
| 2568 |
project_data.append(sprite_info)
|
| 2569 |
|
| 2570 |
# =========================================
|
|
@@ -2573,20 +2631,10 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2573 |
backdrop_data = []
|
| 2574 |
copied_backdrop_folders = set()
|
| 2575 |
for backdrop_idx, matched_idx in enumerate(most_similar_indices):
|
| 2576 |
-
|
| 2577 |
-
continue
|
| 2578 |
-
|
| 2579 |
-
# protect against out-of-range indices
|
| 2580 |
-
try:
|
| 2581 |
-
matched_image_path = folder_image_paths[int(matched_idx)]
|
| 2582 |
-
except Exception as e:
|
| 2583 |
-
logger.warning(f"Matched index {matched_idx} invalid for backdrop loop: {e}")
|
| 2584 |
-
continue
|
| 2585 |
-
|
| 2586 |
-
matched_image_path = os.path.normpath(str(matched_image_path))
|
| 2587 |
matched_folder = os.path.dirname(matched_image_path)
|
| 2588 |
matched_filename = os.path.basename(matched_image_path)
|
| 2589 |
-
|
| 2590 |
# CHANGED: use our new normalized backdrop_base_path
|
| 2591 |
if not matched_folder.startswith(backdrop_base_path):
|
| 2592 |
continue
|
|
@@ -2607,11 +2655,14 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2607 |
logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
|
| 2608 |
except Exception as e:
|
| 2609 |
logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
|
| 2610 |
-
|
| 2611 |
# copy non‐matched files
|
| 2612 |
for fname in os.listdir(matched_folder):
|
|
|
|
| 2613 |
if fname in {matched_filename, 'project.json'}:
|
| 2614 |
continue
|
|
|
|
|
|
|
| 2615 |
src = os.path.join(matched_folder, fname)
|
| 2616 |
dst = os.path.join(project_folder, fname)
|
| 2617 |
if os.path.isfile(src):
|
|
@@ -2624,7 +2675,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2624 |
# append the stage‐target from its project.json
|
| 2625 |
pj = os.path.join(matched_folder, 'project.json')
|
| 2626 |
if os.path.exists(pj):
|
| 2627 |
-
with open(pj, 'r'
|
| 2628 |
bd_json = json.load(f)
|
| 2629 |
for tgt in bd_json.get("targets", []):
|
| 2630 |
if tgt.get("isStage"):
|
|
@@ -2632,6 +2683,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2632 |
else:
|
| 2633 |
logger.warning(f"No project.json in {matched_folder}")
|
| 2634 |
|
|
|
|
| 2635 |
# =========================================
|
| 2636 |
# Merge into final Scratch project.json
|
| 2637 |
# =========================================
|
|
@@ -2659,24 +2711,24 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2659 |
if key not in seen_costumes:
|
| 2660 |
seen_costumes.add(key)
|
| 2661 |
all_costumes.append(costume)
|
| 2662 |
-
|
| 2663 |
if i == 0:
|
| 2664 |
sounds = bd.get("sounds", [])
|
| 2665 |
stage_obj={
|
| 2666 |
"isStage": True,
|
| 2667 |
"name": "Stage",
|
| 2668 |
"objName": "Stage",
|
| 2669 |
-
"variables": {},
|
| 2670 |
-
"lists": {},
|
| 2671 |
"broadcasts": {},
|
| 2672 |
-
"blocks": {},
|
| 2673 |
"comments": {},
|
| 2674 |
"currentCostume": 1 if len(all_costumes) > 1 else 0,
|
| 2675 |
"costumes": all_costumes,
|
| 2676 |
"sounds": sounds,
|
| 2677 |
-
"volume": 100,
|
| 2678 |
"layerOrder": 0,
|
| 2679 |
-
"tempo": 60,
|
| 2680 |
"videoTransparency": 50,
|
| 2681 |
"videoState": "on",
|
| 2682 |
"textToSpeechLanguage": None
|
|
@@ -2697,7 +2749,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2697 |
logger.info(f"✅ Default backdrop sound copied to project: {default_backdrop_sound_name}")
|
| 2698 |
except Exception as e:
|
| 2699 |
logger.error(f"❌ Failed to copy default backdrop: {e}")
|
| 2700 |
-
|
| 2701 |
stage_obj={
|
| 2702 |
"isStage": True,
|
| 2703 |
"name": "Stage",
|
|
@@ -2738,7 +2790,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
|
| 2738 |
}
|
| 2739 |
final_project["targets"].insert(0, stage_obj)
|
| 2740 |
|
| 2741 |
-
with open(project_json_path, 'w'
|
| 2742 |
json.dump(final_project, f, indent=2)
|
| 2743 |
|
| 2744 |
return project_json_path
|
|
|
|
| 32 |
import faiss
|
| 33 |
from sentence_transformers import SentenceTransformer
|
| 34 |
|
| 35 |
+
# --- Config (tune threads as needed) ---
|
| 36 |
+
DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
|
| 37 |
+
DEVICE = torch.device("cpu")
|
| 38 |
+
torch.set_num_threads(4) # tune for your CPU
|
| 39 |
|
| 40 |
+
# --- Globals for single-shot model load ---
|
| 41 |
+
_dinov2_processor = None
|
| 42 |
+
_dinov2_model = None
|
| 43 |
|
| 44 |
# os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
|
| 45 |
# class ChatOpenRouter(ChatOpenAI):
|
|
|
|
| 388 |
# def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
| 389 |
# norm = np.linalg.norm(a, axis=1, keepdims=True)
|
| 390 |
# return a / (norm + eps)
|
| 391 |
+
import torch
|
| 392 |
+
from transformers import AutoImageProcessor, AutoModel
|
| 393 |
+
import numpy as np
|
| 394 |
+
from PIL import Image
|
| 395 |
+
from pathlib import Path
|
| 396 |
+
from io import BytesIO
|
| 397 |
+
import json
|
| 398 |
+
def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
|
| 399 |
+
"""
|
| 400 |
+
Lazy-initialize DINOv2 processor & model (call once before embedding).
|
| 401 |
+
"""
|
| 402 |
+
global _dinov2_processor, _dinov2_model
|
| 403 |
+
if _dinov2_processor is None or _dinov2_model is None:
|
| 404 |
+
_dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
|
| 405 |
+
_dinov2_model = AutoModel.from_pretrained(model_name)
|
| 406 |
+
_dinov2_model.eval().to(device)
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def embed_bytesio_list(bytesio_list, batch_size: int = 8):
|
| 410 |
+
"""
|
| 411 |
+
Accepts a list of BytesIO objects (each contains an image).
|
| 412 |
+
Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
|
| 413 |
+
"""
|
| 414 |
+
if _dinov2_processor is None or _dinov2_model is None:
|
| 415 |
+
init_dinov2()
|
| 416 |
+
|
| 417 |
+
imgs = []
|
| 418 |
+
for b in bytesio_list:
|
| 419 |
+
with Image.open(b) as original_img:
|
| 420 |
+
# Create a new image with a white background in RGB mode
|
| 421 |
+
final_img = Image.new("RGB", original_img.size, (255, 255, 255))
|
| 422 |
+
# Paste the original image onto the white background, using the alpha channel as a mask if it exists
|
| 423 |
+
if original_img.mode == 'RGBA':
|
| 424 |
+
final_img.paste(original_img, mask=original_img.split()[-1])
|
| 425 |
+
else:
|
| 426 |
+
final_img.paste(original_img)
|
| 427 |
+
imgs.append(final_img.copy())
|
| 428 |
+
|
| 429 |
+
embs = []
|
| 430 |
+
for i in range(0, len(imgs), batch_size):
|
| 431 |
+
batch = imgs[i: i + batch_size]
|
| 432 |
+
inputs = _dinov2_processor(images=batch, return_tensors="pt")
|
| 433 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 434 |
+
with torch.no_grad():
|
| 435 |
+
out = _dinov2_model(**inputs)
|
| 436 |
+
cls = out.last_hidden_state[:, 0, :] # (B, D)
|
| 437 |
+
cls = torch.nn.functional.normalize(cls, p=2, dim=1)
|
| 438 |
+
embs.append(cls.cpu().numpy())
|
| 439 |
+
|
| 440 |
+
if not embs:
|
| 441 |
+
return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
|
| 442 |
+
|
| 443 |
+
return np.vstack(embs).astype(np.float32)
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
| 447 |
+
"""
|
| 448 |
+
Row-wise L2 normalization for numpy arrays.
|
| 449 |
+
"""
|
| 450 |
+
norm = np.linalg.norm(a, axis=1, keepdims=True)
|
| 451 |
+
return a / (norm + eps)
|
| 452 |
|
| 453 |
+
|
| 454 |
# Helper function to load the block catalog from a JSON file
|
| 455 |
def _load_block_catalog(block_type: str) -> Dict:
|
| 456 |
"""
|
|
|
|
| 2367 |
state["processing"]= False
|
| 2368 |
return state
|
| 2369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2370 |
def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
| 2371 |
''' Extract images from PDF and generate structured sprite JSON '''
|
| 2372 |
+
manipulated_json = {}
|
| 2373 |
+
img_elements = []
|
| 2374 |
try:
|
| 2375 |
# {
|
| 2376 |
# pdf_path = Path(pdf_path)
|
|
|
|
| 2378 |
# pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
|
| 2379 |
# print("-------------------------------pdf_filename-------------------------------",pdf_filename)
|
| 2380 |
# print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
|
|
|
|
| 2381 |
if isinstance(pdf_stream, io.BytesIO):
|
| 2382 |
# use a random ID since there's no filename
|
| 2383 |
pdf_id = uuid.uuid4().hex
|
| 2384 |
else:
|
| 2385 |
pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
|
| 2386 |
+
|
| 2387 |
# extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
|
| 2388 |
# json_subdir = JSON_DIR / pdf_filename
|
| 2389 |
# extracted_image_subdir.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 2398 |
# print("-------------------------------final_json_path-------------------------------",final_json_path)
|
| 2399 |
# print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
|
| 2400 |
|
|
|
|
| 2401 |
try:
|
| 2402 |
elements = partition_pdf(
|
| 2403 |
# filename=str(pdf_path), # partition_pdf might expect a string
|
|
|
|
| 2406 |
extract_image_block_types=["Image"],
|
| 2407 |
hi_res_model_name="yolox",
|
| 2408 |
extract_image_block_to_payload=True,
|
| 2409 |
+
# ocr_languages=ocr_lang,
|
| 2410 |
+
# extract_images_in_pdf=False,
|
| 2411 |
+
# extract_image_block_output_dir=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\pdf_output"
|
| 2412 |
)
|
| 2413 |
print(f"ELEMENTS")
|
| 2414 |
except Exception as e:
|
| 2415 |
raise RuntimeError(
|
| 2416 |
f"❌ Failed to extract images from PDF: {str(e)}")
|
| 2417 |
+
|
| 2418 |
file_elements = [element.to_dict() for element in elements]
|
| 2419 |
+
print(f"========== file elements: \n{file_elements}")
|
| 2420 |
+
|
| 2421 |
#{
|
| 2422 |
# try:
|
| 2423 |
# with open(output_json_path, "w") as f:
|
|
|
|
| 2425 |
# for element in elements], f, indent=4)
|
| 2426 |
# except Exception as e:
|
| 2427 |
# raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
|
| 2428 |
+
|
| 2429 |
# try:
|
| 2430 |
# # Display extracted images
|
| 2431 |
# with open(output_json_path, 'r') as file:
|
| 2432 |
# file_elements = json.load(file)
|
| 2433 |
# except Exception as e:
|
| 2434 |
+
# raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
|
| 2435 |
# }
|
| 2436 |
+
|
| 2437 |
sprite_count = 1
|
| 2438 |
for el in file_elements:
|
| 2439 |
img_b64 = el["metadata"].get("image_base64")
|
| 2440 |
if not img_b64:
|
| 2441 |
+
continue
|
| 2442 |
+
|
| 2443 |
manipulated_json[f"Sprite {sprite_count}"] = {
|
| 2444 |
# "id":auto_id,
|
| 2445 |
# "name": name,
|
|
|
|
| 2453 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
| 2454 |
|
| 2455 |
# def similarity_matching(input_json_path: str, project_folder: str) -> str:
|
| 2456 |
+
# # --- Config (tune threads as needed) ---
|
| 2457 |
+
# DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
|
| 2458 |
+
# DEVICE = torch.device("cpu")
|
| 2459 |
+
# torch.set_num_threads(4) # tune for your CPU
|
| 2460 |
+
|
| 2461 |
+
# --- Globals for single-shot model load ---
|
| 2462 |
+
# _dinov2_processor = None
|
| 2463 |
+
# _dinov2_model = None
|
| 2464 |
+
|
| 2465 |
+
|
| 2466 |
+
|
| 2467 |
+
|
| 2468 |
def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
| 2469 |
logger.info("🔍 Running similarity matching…")
|
| 2470 |
os.makedirs(project_folder, exist_ok=True)
|
|
|
|
| 2481 |
# ==============================
|
| 2482 |
# READ SPRITE METADATA
|
| 2483 |
# ==============================
|
| 2484 |
+
# with open(input_json_path, 'r') as f:
|
| 2485 |
+
# sprites_data = json.load(f)
|
| 2486 |
+
|
| 2487 |
sprite_ids, sprite_base64 = [], []
|
| 2488 |
for sid, sprite in sprites_data.items():
|
| 2489 |
sprite_ids.append(sid)
|
| 2490 |
+
# texts.append("This is " + sprite.get("description", sprite.get("name", "")))
|
| 2491 |
sprite_base64.append(sprite["base64"])
|
| 2492 |
|
| 2493 |
sprite_images_bytes = []
|
|
|
|
| 2497 |
img.save(buffer, format="PNG")
|
| 2498 |
buffer.seek(0)
|
| 2499 |
sprite_images_bytes.append(buffer)
|
| 2500 |
+
|
| 2501 |
# =========================================
|
| 2502 |
# Build the list of all candidate images
|
|
|
|
| 2503 |
# =========================================
|
| 2504 |
+
folder_image_paths = [
|
| 2505 |
+
BACKDROP_DIR/"Baseball 2.sb3"/"7be1f5b3e682813dac1f297e52ff7dca.png",
|
| 2506 |
+
BACKDROP_DIR/"Beach Malibu.sb3"/"050615fe992a00d6af0e664e497ebf53.png",
|
| 2507 |
+
BACKDROP_DIR/"Bedroom 3.sb3"/"8cc0b88d53345b3e337e8f028a32a4e7.png",
|
| 2508 |
+
BACKDROP_DIR/"Blue Sky.sb3"/"e7c147730f19d284bcd7b3f00af19bb6.png",
|
| 2509 |
+
BACKDROP_DIR/"Castle 2.sb3"/"951765ee7f7370f120c9df20b577c22f.png",
|
| 2510 |
+
BACKDROP_DIR/"Colorful City.sb3"/"04d18ddd1b85f0ea30beb14b8da49f60.png",
|
| 2511 |
+
BACKDROP_DIR/"Hall.sb3"/"ea86ca30b346f27ca5faf1254f6a31e3.png",
|
| 2512 |
+
BACKDROP_DIR/"Jungle.sb3"/"f4f908da19e2753f3ed679d7b37650ca.png",
|
| 2513 |
+
BACKDROP_DIR/"Soccer.sb3"/"04a63154f04b09494354090f7cc2f1b9.png",
|
| 2514 |
+
BACKDROP_DIR/"Theater.sb3"/"c2b097bc5cdb6a14ef5485202bc5ee76.png",
|
| 2515 |
+
|
| 2516 |
+
SPRITE_DIR/"Batter.sprite3"/"592ee9ab2aeefe65cb4fb95fcd046f33.png",
|
| 2517 |
+
SPRITE_DIR/"Batter.sprite3"/"9d193bef6e3d6d8eba6d1470b8bf9351.png",
|
| 2518 |
+
SPRITE_DIR/"Batter.sprite3"/"baseball_sprite_motion_1.png",
|
| 2519 |
+
SPRITE_DIR/"Batter.sprite3"/"bd4fc003528acfa847e45ff82f346eee.png",
|
| 2520 |
+
SPRITE_DIR/"Batter.sprite3"/"fdfde4bcbaca0f68e83fdf3f4ef0c660.png",
|
| 2521 |
+
SPRITE_DIR/"Bear.sprite3"/"6f303e972f33fcb7ef36d0d8012d0975.png",
|
| 2522 |
+
SPRITE_DIR/"Bear.sprite3"/"bear_motion_2.png",
|
| 2523 |
+
SPRITE_DIR/"Bear.sprite3"/"deef1eaa96d550ae6fc11524a1935024.png",
|
| 2524 |
+
SPRITE_DIR/"Beetle.sprite3"/"46d0dfd4ae7e9bfe3a6a2e35a4905eae.png",
|
| 2525 |
+
SPRITE_DIR/"Butterfly 1.sprite3"/"34b76c1835c6a7fc2c47956e49bb0f52.png",
|
| 2526 |
+
SPRITE_DIR/"Butterfly 1.sprite3"/"49c9f952007d870a046cff93b6e5e098.png",
|
| 2527 |
+
SPRITE_DIR/"Butterfly 1.sprite3"/"fe98df7367e314d9640bfaa54fc239be.png",
|
| 2528 |
+
SPRITE_DIR/"Cat.sprite3"/"0fb9be3e8397c983338cb71dc84d0b25.png",
|
| 2529 |
+
SPRITE_DIR/"Cat.sprite3"/"bcf454acf82e4504149f7ffe07081dbc.png",
|
| 2530 |
+
SPRITE_DIR/"Centaur.sprite3"/"2373556e776cad3ba4d6ee04fc34550b.png",
|
| 2531 |
+
SPRITE_DIR/"Centaur.sprite3"/"c00ffa6c5dd0baf9f456b897ff974377.png",
|
| 2532 |
+
SPRITE_DIR/"Centaur.sprite3"/"d722329bd9373ad80625e5be6d52f3ed.png",
|
| 2533 |
+
SPRITE_DIR/"Centaur.sprite3"/"d7aa990538915b7ef1f496d7e8486ade.png",
|
| 2534 |
+
SPRITE_DIR/"City Bus.sprite3"/"7d7e26014a346b894db8ab1819f2167f.png",
|
| 2535 |
+
SPRITE_DIR/"City Bus.sprite3"/"e9694adbff9422363e2ea03166015393.png",
|
| 2536 |
+
SPRITE_DIR/"Crab.sprite3"/"49839aa1b0feed02a3c759db5f8dee71.png",
|
| 2537 |
+
SPRITE_DIR/"Crab.sprite3"/"bear_element.png",
|
| 2538 |
+
SPRITE_DIR/"Crab.sprite3"/"f7cdd2acbc6d7559d33be8675059c79e.png",
|
| 2539 |
+
SPRITE_DIR/"Glow-G.sprite3"/"56839bc48957869d980c6f9b6f5a2a91.png",
|
| 2540 |
+
SPRITE_DIR/"Jordyn.sprite3"/"00c8c464c19460df693f8d5ae69afdab.png",
|
| 2541 |
+
SPRITE_DIR/"Jordyn.sprite3"/"768c4601174f0dfcb96b3080ccc3a192.png",
|
| 2542 |
+
SPRITE_DIR/"Jordyn.sprite3"/"a7cc1e5f02b58ecc8095cfc18eef0289.png",
|
| 2543 |
+
SPRITE_DIR/"Jordyn.sprite3"/"db4d97cbf24e2b8af665bfbf06f67fa0.png",
|
| 2544 |
+
SPRITE_DIR/"Soccer Ball.sprite3"/"5d973d7a3a8be3f3bd6e1cd0f73c32b5.png",
|
| 2545 |
+
SPRITE_DIR/"Soccer Ball.sprite3"/"cat_football.png",
|
| 2546 |
+
SPRITE_DIR/"Star.sprite3"/"551629f2a64c1f3703e57aaa133effa6.png",
|
| 2547 |
+
SPRITE_DIR/"Wizard.sprite3"/"55ba51188af86ca16ef30267e874c1ed.png",
|
| 2548 |
+
SPRITE_DIR/"Wizard.sprite3"/"91d495085eb4d02a375c42f6318071e7.png",
|
| 2549 |
+
SPRITE_DIR/"Wizard.sprite3"/"df943c9894ee4b9df8c5893ce30c2a5f.png",
|
| 2550 |
+
|
| 2551 |
+
# CODE_BLOCKS_DIR/"client_code_block_1.jpg",
|
| 2552 |
+
# CODE_BLOCKS_DIR/"client_code_block_2.jpg",
|
| 2553 |
+
CODE_BLOCKS_DIR/"script1.JPG",
|
| 2554 |
+
CODE_BLOCKS_DIR/"script2.JPG",
|
| 2555 |
+
CODE_BLOCKS_DIR/"script3.JPG",
|
| 2556 |
+
CODE_BLOCKS_DIR/"script4.JPG",
|
| 2557 |
+
CODE_BLOCKS_DIR/"script5.JPG",
|
| 2558 |
+
CODE_BLOCKS_DIR/"script6.JPG",
|
| 2559 |
+
CODE_BLOCKS_DIR/"script7.JPG",
|
| 2560 |
+
CODE_BLOCKS_DIR/"script8.JPG",
|
| 2561 |
+
CODE_BLOCKS_DIR/"script9.JPG",
|
| 2562 |
+
CODE_BLOCKS_DIR/"static_white.png"]
|
| 2563 |
+
folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
|
| 2564 |
+
# =========================================
|
| 2565 |
|
| 2566 |
# -----------------------------------------
|
| 2567 |
+
# Load reference embeddings from JSON
|
|
|
|
| 2568 |
# -----------------------------------------
|
| 2569 |
+
with open(f"{BLOCKS_DIR}/dinov2_embeddings.json", "r") as f:
|
| 2570 |
+
embedding_json = json.load(f)
|
| 2571 |
+
|
| 2572 |
+
# ============================== #
|
| 2573 |
+
# EMBED SPRITE IMAGES #
|
| 2574 |
+
# ============================== #
|
| 2575 |
+
# ensure model is initialized (fast no-op after first call)
|
| 2576 |
+
init_dinov2()
|
| 2577 |
+
|
| 2578 |
+
# embed the incoming sprite BytesIO images (same data structure you already use)
|
| 2579 |
+
sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
|
| 2580 |
+
|
| 2581 |
+
# load reference embeddings from JSON (they must be numeric lists)
|
| 2582 |
+
img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 2583 |
+
|
| 2584 |
+
# normalize both sides (important — stored embeddings may not be normalized)
|
| 2585 |
+
sprite_matrix = l2_normalize_rows(sprite_matrix)
|
| 2586 |
+
img_matrix = l2_normalize_rows(img_matrix)
|
| 2587 |
+
|
| 2588 |
+
# =========================================
|
| 2589 |
+
# Compute similarities & pick best match
|
| 2590 |
+
# =========================================
|
| 2591 |
+
similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 2592 |
+
most_similar_indices = np.argmax(similarity, axis=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2593 |
|
| 2594 |
# =========================================
|
| 2595 |
# Copy matched sprite assets + collect data
|
|
|
|
| 2598 |
copied_folders = set()
|
| 2599 |
|
| 2600 |
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
| 2601 |
+
matched_image_path = folder_image_paths[matched_idx]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2602 |
matched_folder = os.path.dirname(matched_image_path)
|
| 2603 |
|
| 2604 |
# CHANGED: use our new normalized sprite_base_path
|
|
|
|
| 2615 |
logger.warning(f"No sprite.json in {matched_folder}")
|
| 2616 |
continue
|
| 2617 |
|
| 2618 |
+
with open(sprite_json_path, 'r') as f:
|
| 2619 |
sprite_info = json.load(f)
|
| 2620 |
# copy all non‐matched files
|
| 2621 |
for fname in os.listdir(matched_folder):
|
| 2622 |
if fname in (os.path.basename(matched_image_path), 'sprite.json'):
|
| 2623 |
continue
|
| 2624 |
+
shutil.copy2(os.path.join(matched_folder, fname),
|
| 2625 |
+
os.path.join(project_folder, fname))
|
|
|
|
|
|
|
|
|
|
| 2626 |
project_data.append(sprite_info)
|
| 2627 |
|
| 2628 |
# =========================================
|
|
|
|
| 2631 |
backdrop_data = []
|
| 2632 |
copied_backdrop_folders = set()
|
| 2633 |
for backdrop_idx, matched_idx in enumerate(most_similar_indices):
|
| 2634 |
+
matched_image_path = folder_image_paths[matched_idx]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2635 |
matched_folder = os.path.dirname(matched_image_path)
|
| 2636 |
matched_filename = os.path.basename(matched_image_path)
|
| 2637 |
+
|
| 2638 |
# CHANGED: use our new normalized backdrop_base_path
|
| 2639 |
if not matched_folder.startswith(backdrop_base_path):
|
| 2640 |
continue
|
|
|
|
| 2655 |
logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
|
| 2656 |
except Exception as e:
|
| 2657 |
logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
|
| 2658 |
+
|
| 2659 |
# copy non‐matched files
|
| 2660 |
for fname in os.listdir(matched_folder):
|
| 2661 |
+
# if fname in (os.path.basename(matched_image_path), 'project.json'):
|
| 2662 |
if fname in {matched_filename, 'project.json'}:
|
| 2663 |
continue
|
| 2664 |
+
# shutil.copy2(os.path.join(matched_folder, fname),
|
| 2665 |
+
# os.path.join(project_folder, fname))
|
| 2666 |
src = os.path.join(matched_folder, fname)
|
| 2667 |
dst = os.path.join(project_folder, fname)
|
| 2668 |
if os.path.isfile(src):
|
|
|
|
| 2675 |
# append the stage‐target from its project.json
|
| 2676 |
pj = os.path.join(matched_folder, 'project.json')
|
| 2677 |
if os.path.exists(pj):
|
| 2678 |
+
with open(pj, 'r') as f:
|
| 2679 |
bd_json = json.load(f)
|
| 2680 |
for tgt in bd_json.get("targets", []):
|
| 2681 |
if tgt.get("isStage"):
|
|
|
|
| 2683 |
else:
|
| 2684 |
logger.warning(f"No project.json in {matched_folder}")
|
| 2685 |
|
| 2686 |
+
|
| 2687 |
# =========================================
|
| 2688 |
# Merge into final Scratch project.json
|
| 2689 |
# =========================================
|
|
|
|
| 2711 |
if key not in seen_costumes:
|
| 2712 |
seen_costumes.add(key)
|
| 2713 |
all_costumes.append(costume)
|
| 2714 |
+
|
| 2715 |
if i == 0:
|
| 2716 |
sounds = bd.get("sounds", [])
|
| 2717 |
stage_obj={
|
| 2718 |
"isStage": True,
|
| 2719 |
"name": "Stage",
|
| 2720 |
"objName": "Stage",
|
| 2721 |
+
"variables": {},
|
| 2722 |
+
"lists": {},
|
| 2723 |
"broadcasts": {},
|
| 2724 |
+
"blocks": {},
|
| 2725 |
"comments": {},
|
| 2726 |
"currentCostume": 1 if len(all_costumes) > 1 else 0,
|
| 2727 |
"costumes": all_costumes,
|
| 2728 |
"sounds": sounds,
|
| 2729 |
+
"volume": 100,
|
| 2730 |
"layerOrder": 0,
|
| 2731 |
+
"tempo": 60,
|
| 2732 |
"videoTransparency": 50,
|
| 2733 |
"videoState": "on",
|
| 2734 |
"textToSpeechLanguage": None
|
|
|
|
| 2749 |
logger.info(f"✅ Default backdrop sound copied to project: {default_backdrop_sound_name}")
|
| 2750 |
except Exception as e:
|
| 2751 |
logger.error(f"❌ Failed to copy default backdrop: {e}")
|
| 2752 |
+
|
| 2753 |
stage_obj={
|
| 2754 |
"isStage": True,
|
| 2755 |
"name": "Stage",
|
|
|
|
| 2790 |
}
|
| 2791 |
final_project["targets"].insert(0, stage_obj)
|
| 2792 |
|
| 2793 |
+
with open(project_json_path, 'w') as f:
|
| 2794 |
json.dump(final_project, f, indent=2)
|
| 2795 |
|
| 2796 |
return project_json_path
|