prthm11 commited on
Commit
b00aa34
·
verified ·
1 Parent(s): 8f72830

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +206 -154
app.py CHANGED
@@ -32,14 +32,14 @@ from transformers import AutoImageProcessor, AutoModel
32
  import faiss
33
  from sentence_transformers import SentenceTransformer
34
 
35
- # # --- Config (tune threads as needed) ---
36
- # DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
37
- # DEVICE = torch.device("cpu")
38
- # torch.set_num_threads(4) # tune for your CPU
39
 
40
- # # --- Globals for single-shot model load ---
41
- # _dinov2_processor = None
42
- # _dinov2_model = None
43
 
44
  # os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
45
  # class ChatOpenRouter(ChatOpenAI):
@@ -388,7 +388,69 @@ def load_model_and_index():
388
  # def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
389
  # norm = np.linalg.norm(a, axis=1, keepdims=True)
390
  # return a / (norm + eps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
 
392
  # Helper function to load the block catalog from a JSON file
393
  def _load_block_catalog(block_type: str) -> Dict:
394
  """
@@ -2305,12 +2367,10 @@ def processed_page_node(state: GameState):
2305
  state["processing"]= False
2306
  return state
2307
 
2308
- # Prepare manipulated sprite JSON structure
2309
- manipulated_json = {}
2310
- img_elements = []
2311
- # { changes: "pdf_stream" in place of "pdf_path"
2312
  def extract_images_from_pdf(pdf_stream: io.BytesIO):
2313
  ''' Extract images from PDF and generate structured sprite JSON '''
 
 
2314
  try:
2315
  # {
2316
  # pdf_path = Path(pdf_path)
@@ -2318,13 +2378,12 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
2318
  # pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
2319
  # print("-------------------------------pdf_filename-------------------------------",pdf_filename)
2320
  # print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
2321
-
2322
  if isinstance(pdf_stream, io.BytesIO):
2323
  # use a random ID since there's no filename
2324
  pdf_id = uuid.uuid4().hex
2325
  else:
2326
  pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
2327
-
2328
  # extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
2329
  # json_subdir = JSON_DIR / pdf_filename
2330
  # extracted_image_subdir.mkdir(parents=True, exist_ok=True)
@@ -2339,7 +2398,6 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
2339
  # print("-------------------------------final_json_path-------------------------------",final_json_path)
2340
  # print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
2341
 
2342
- # }
2343
  try:
2344
  elements = partition_pdf(
2345
  # filename=str(pdf_path), # partition_pdf might expect a string
@@ -2348,14 +2406,18 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
2348
  extract_image_block_types=["Image"],
2349
  hi_res_model_name="yolox",
2350
  extract_image_block_to_payload=True,
 
 
 
2351
  )
2352
  print(f"ELEMENTS")
2353
  except Exception as e:
2354
  raise RuntimeError(
2355
  f"❌ Failed to extract images from PDF: {str(e)}")
2356
-
2357
  file_elements = [element.to_dict() for element in elements]
2358
-
 
2359
  #{
2360
  # try:
2361
  # with open(output_json_path, "w") as f:
@@ -2363,21 +2425,21 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
2363
  # for element in elements], f, indent=4)
2364
  # except Exception as e:
2365
  # raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
2366
-
2367
  # try:
2368
  # # Display extracted images
2369
  # with open(output_json_path, 'r') as file:
2370
  # file_elements = json.load(file)
2371
  # except Exception as e:
2372
- # raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
2373
  # }
2374
-
2375
  sprite_count = 1
2376
  for el in file_elements:
2377
  img_b64 = el["metadata"].get("image_base64")
2378
  if not img_b64:
2379
- continue
2380
-
2381
  manipulated_json[f"Sprite {sprite_count}"] = {
2382
  # "id":auto_id,
2383
  # "name": name,
@@ -2391,6 +2453,18 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
2391
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
2392
 
2393
  # def similarity_matching(input_json_path: str, project_folder: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
2394
  def similarity_matching(sprites_data: str, project_folder: str) -> str:
2395
  logger.info("🔍 Running similarity matching…")
2396
  os.makedirs(project_folder, exist_ok=True)
@@ -2407,9 +2481,13 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2407
  # ==============================
2408
  # READ SPRITE METADATA
2409
  # ==============================
 
 
 
2410
  sprite_ids, sprite_base64 = [], []
2411
  for sid, sprite in sprites_data.items():
2412
  sprite_ids.append(sid)
 
2413
  sprite_base64.append(sprite["base64"])
2414
 
2415
  sprite_images_bytes = []
@@ -2419,106 +2497,99 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2419
  img.save(buffer, format="PNG")
2420
  buffer.seek(0)
2421
  sprite_images_bytes.append(buffer)
2422
-
2423
  # =========================================
2424
  # Build the list of all candidate images
2425
- # (REPLACED: now loaded from FAISS paths JSON)
2426
  # =========================================
2427
- # NOTE: previously you had a hardcoded `folder_image_paths` sample.
2428
- # We'll load image paths from BLOCKS_DIR/image_paths.json (faiss index companion).
2429
- try:
2430
- index_path = os.path.join(str(BLOCKS_DIR), "faiss_index 2.bin")
2431
- paths_json_path = os.path.join(str(BLOCKS_DIR), "image_paths 2.json")
2432
- # load image paths (these should be full paths or relative paths you used when building the index)
2433
- with open(paths_json_path, "r", encoding="utf-8") as f:
2434
- image_paths = json.load(f)
2435
- folder_image_paths = [os.path.normpath(str(p)) for p in image_paths]
2436
- except Exception as e:
2437
- # Fallback: if image_paths.json not available, fall back to any existing embed.json usage
2438
- logger.error(f"Failed to load FAISS paths JSON from {paths_json_path}: {e}")
2439
- # try to load previous embedding_json and recover its paths (if present)
2440
- try:
2441
- with open(f"{BLOCKS_DIR}/embed.json", "r", encoding="utf-8") as f:
2442
- embedding_json = json.load(f)
2443
- # embedding_json entries expected to have a 'path' or similar field; try common keys
2444
- folder_image_paths = []
2445
- for idx_entry in embedding_json:
2446
- if "path" in idx_entry:
2447
- folder_image_paths.append(os.path.normpath(str(idx_entry["path"])))
2448
- elif "filename" in idx_entry:
2449
- folder_image_paths.append(os.path.normpath(str(idx_entry["filename"])))
2450
- else:
2451
- # If no path available, synthesize a placeholder to avoid crashing later
2452
- folder_image_paths.append(os.path.normpath(str(BLOCKS_DIR)))
2453
- logger.warning("Using embed.json fallback to populate folder_image_paths.")
2454
- except Exception as e2:
2455
- logger.error(f"Failed to load embed.json fallback: {e2}")
2456
- folder_image_paths = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2457
 
2458
  # -----------------------------------------
2459
- # Load FAISS index and SentenceTransformer model
2460
- # and perform search for each sprite image
2461
  # -----------------------------------------
2462
- most_similar_indices = None
2463
- try:
2464
- import faiss # local import in case not available globally
2465
- from sentence_transformers import SentenceTransformer
2466
- # load model (this may take time but ensures correct embedder)
2467
- logger.info("Loading SentenceTransformer 'clip-ViT-L-14' model for embeddings...")
2468
- st_model = SentenceTransformer('clip-ViT-L-14')
2469
-
2470
- logger.info(f"Loading FAISS index from {index_path} ...")
2471
- index = faiss.read_index(index_path)
2472
-
2473
- # Convert BytesIO images to PIL images for batch encoding
2474
- pil_images = []
2475
- for buf in sprite_images_bytes:
2476
- buf.seek(0)
2477
- pil = Image.open(buf).convert("RGB")
2478
- pil_images.append(pil)
2479
-
2480
- if len(pil_images) == 0:
2481
- raise ValueError("No sprite images available to encode.")
2482
-
2483
- logger.info(f"Encoding {len(pil_images)} sprite images with SentenceTransformer...")
2484
- # encode returns numpy array (n_samples, dim)
2485
- sprite_embeddings = st_model.encode(pil_images, batch_size=8, convert_to_numpy=True)
2486
- # ensure float32 and contiguous
2487
- sprite_embeddings = np.ascontiguousarray(sprite_embeddings.astype(np.float32))
2488
-
2489
- # number of neighbors to request (we only need the top-1 to emulate previous behavior)
2490
- k = 1
2491
- logger.info(f"Performing FAISS search (k={k}) for all sprites...")
2492
- distances, indices = index.search(sprite_embeddings, k) # distances.shape = (N, k); indices.shape = (N, k)
2493
- # take top-1 index for each sprite
2494
- if indices is not None and indices.shape[0] > 0:
2495
- most_similar_indices = indices[:, 0].tolist()
2496
- else:
2497
- most_similar_indices = []
2498
- except Exception as e:
2499
- logger.error(f"FAISS / SentenceTransformer matching failed: {e}. Falling back to simple in-memory similarity if possible.")
2500
- # fallback: if we previously loaded embedding_json, use that in-memory approach (cosine)
2501
- try:
2502
- if 'embedding_json' not in locals():
2503
- with open(f"{BLOCKS_DIR}/embed.json", "r", encoding="utf-8") as f:
2504
- embedding_json = json.load(f)
2505
- # build img_matrix from embedding_json entries (assumes "embeddings" key)
2506
- img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
2507
- # compute embeddings for sprites using any available embed function (init_dinov2/embed_bytesio_list previously used)
2508
- init_dinov2()
2509
- sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8)
2510
- sprite_matrix = l2_normalize_rows(sprite_matrix)
2511
- img_matrix = l2_normalize_rows(img_matrix)
2512
- similarity = np.matmul(sprite_matrix, img_matrix.T)
2513
- most_similar_indices = np.argmax(similarity, axis=1).tolist()
2514
- logger.info("Fallback in-memory similarity search succeeded.")
2515
- except Exception as e2:
2516
- logger.error(f"Fallback similarity also failed: {e2}")
2517
- most_similar_indices = []
2518
-
2519
- # Ensure we have a list with same length as number of sprites (or empty)
2520
- if most_similar_indices is None:
2521
- most_similar_indices = []
2522
 
2523
  # =========================================
2524
  # Copy matched sprite assets + collect data
@@ -2527,17 +2598,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2527
  copied_folders = set()
2528
 
2529
  for sprite_idx, matched_idx in enumerate(most_similar_indices):
2530
- if matched_idx is None:
2531
- continue
2532
-
2533
- # protect against out-of-range indices
2534
- try:
2535
- matched_image_path = folder_image_paths[int(matched_idx)]
2536
- except Exception as e:
2537
- logger.warning(f"Matched index {matched_idx} invalid: {e}")
2538
- continue
2539
-
2540
- matched_image_path = os.path.normpath(str(matched_image_path))
2541
  matched_folder = os.path.dirname(matched_image_path)
2542
 
2543
  # CHANGED: use our new normalized sprite_base_path
@@ -2554,17 +2615,14 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2554
  logger.warning(f"No sprite.json in {matched_folder}")
2555
  continue
2556
 
2557
- with open(sprite_json_path, 'r', encoding='utf-8') as f:
2558
  sprite_info = json.load(f)
2559
  # copy all non‐matched files
2560
  for fname in os.listdir(matched_folder):
2561
  if fname in (os.path.basename(matched_image_path), 'sprite.json'):
2562
  continue
2563
- try:
2564
- shutil.copy2(os.path.join(matched_folder, fname),
2565
- os.path.join(project_folder, fname))
2566
- except Exception as e:
2567
- logger.error(f"Failed to copy sprite asset {fname} from {matched_folder}: {e}")
2568
  project_data.append(sprite_info)
2569
 
2570
  # =========================================
@@ -2573,20 +2631,10 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2573
  backdrop_data = []
2574
  copied_backdrop_folders = set()
2575
  for backdrop_idx, matched_idx in enumerate(most_similar_indices):
2576
- if matched_idx is None:
2577
- continue
2578
-
2579
- # protect against out-of-range indices
2580
- try:
2581
- matched_image_path = folder_image_paths[int(matched_idx)]
2582
- except Exception as e:
2583
- logger.warning(f"Matched index {matched_idx} invalid for backdrop loop: {e}")
2584
- continue
2585
-
2586
- matched_image_path = os.path.normpath(str(matched_image_path))
2587
  matched_folder = os.path.dirname(matched_image_path)
2588
  matched_filename = os.path.basename(matched_image_path)
2589
-
2590
  # CHANGED: use our new normalized backdrop_base_path
2591
  if not matched_folder.startswith(backdrop_base_path):
2592
  continue
@@ -2607,11 +2655,14 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2607
  logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
2608
  except Exception as e:
2609
  logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
2610
-
2611
  # copy non‐matched files
2612
  for fname in os.listdir(matched_folder):
 
2613
  if fname in {matched_filename, 'project.json'}:
2614
  continue
 
 
2615
  src = os.path.join(matched_folder, fname)
2616
  dst = os.path.join(project_folder, fname)
2617
  if os.path.isfile(src):
@@ -2624,7 +2675,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2624
  # append the stage‐target from its project.json
2625
  pj = os.path.join(matched_folder, 'project.json')
2626
  if os.path.exists(pj):
2627
- with open(pj, 'r', encoding='utf-8') as f:
2628
  bd_json = json.load(f)
2629
  for tgt in bd_json.get("targets", []):
2630
  if tgt.get("isStage"):
@@ -2632,6 +2683,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2632
  else:
2633
  logger.warning(f"No project.json in {matched_folder}")
2634
 
 
2635
  # =========================================
2636
  # Merge into final Scratch project.json
2637
  # =========================================
@@ -2659,24 +2711,24 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2659
  if key not in seen_costumes:
2660
  seen_costumes.add(key)
2661
  all_costumes.append(costume)
2662
-
2663
  if i == 0:
2664
  sounds = bd.get("sounds", [])
2665
  stage_obj={
2666
  "isStage": True,
2667
  "name": "Stage",
2668
  "objName": "Stage",
2669
- "variables": {},
2670
- "lists": {},
2671
  "broadcasts": {},
2672
- "blocks": {},
2673
  "comments": {},
2674
  "currentCostume": 1 if len(all_costumes) > 1 else 0,
2675
  "costumes": all_costumes,
2676
  "sounds": sounds,
2677
- "volume": 100,
2678
  "layerOrder": 0,
2679
- "tempo": 60,
2680
  "videoTransparency": 50,
2681
  "videoState": "on",
2682
  "textToSpeechLanguage": None
@@ -2697,7 +2749,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2697
  logger.info(f"✅ Default backdrop sound copied to project: {default_backdrop_sound_name}")
2698
  except Exception as e:
2699
  logger.error(f"❌ Failed to copy default backdrop: {e}")
2700
-
2701
  stage_obj={
2702
  "isStage": True,
2703
  "name": "Stage",
@@ -2738,7 +2790,7 @@ def similarity_matching(sprites_data: str, project_folder: str) -> str:
2738
  }
2739
  final_project["targets"].insert(0, stage_obj)
2740
 
2741
- with open(project_json_path, 'w', encoding='utf-8') as f:
2742
  json.dump(final_project, f, indent=2)
2743
 
2744
  return project_json_path
 
32
  import faiss
33
  from sentence_transformers import SentenceTransformer
34
 
35
+ # --- Config (tune threads as needed) ---
36
+ DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
37
+ DEVICE = torch.device("cpu")
38
+ torch.set_num_threads(4) # tune for your CPU
39
 
40
+ # --- Globals for single-shot model load ---
41
+ _dinov2_processor = None
42
+ _dinov2_model = None
43
 
44
  # os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
45
  # class ChatOpenRouter(ChatOpenAI):
 
388
  # def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
389
  # norm = np.linalg.norm(a, axis=1, keepdims=True)
390
  # return a / (norm + eps)
391
+ import torch
392
+ from transformers import AutoImageProcessor, AutoModel
393
+ import numpy as np
394
+ from PIL import Image
395
+ from pathlib import Path
396
+ from io import BytesIO
397
+ import json
398
+ def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
399
+ """
400
+ Lazy-initialize DINOv2 processor & model (call once before embedding).
401
+ """
402
+ global _dinov2_processor, _dinov2_model
403
+ if _dinov2_processor is None or _dinov2_model is None:
404
+ _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
405
+ _dinov2_model = AutoModel.from_pretrained(model_name)
406
+ _dinov2_model.eval().to(device)
407
+
408
+
409
+ def embed_bytesio_list(bytesio_list, batch_size: int = 8):
410
+ """
411
+ Accepts a list of BytesIO objects (each contains an image).
412
+ Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
413
+ """
414
+ if _dinov2_processor is None or _dinov2_model is None:
415
+ init_dinov2()
416
+
417
+ imgs = []
418
+ for b in bytesio_list:
419
+ with Image.open(b) as original_img:
420
+ # Create a new image with a white background in RGB mode
421
+ final_img = Image.new("RGB", original_img.size, (255, 255, 255))
422
+ # Paste the original image onto the white background, using the alpha channel as a mask if it exists
423
+ if original_img.mode == 'RGBA':
424
+ final_img.paste(original_img, mask=original_img.split()[-1])
425
+ else:
426
+ final_img.paste(original_img)
427
+ imgs.append(final_img.copy())
428
+
429
+ embs = []
430
+ for i in range(0, len(imgs), batch_size):
431
+ batch = imgs[i: i + batch_size]
432
+ inputs = _dinov2_processor(images=batch, return_tensors="pt")
433
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
434
+ with torch.no_grad():
435
+ out = _dinov2_model(**inputs)
436
+ cls = out.last_hidden_state[:, 0, :] # (B, D)
437
+ cls = torch.nn.functional.normalize(cls, p=2, dim=1)
438
+ embs.append(cls.cpu().numpy())
439
+
440
+ if not embs:
441
+ return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
442
+
443
+ return np.vstack(embs).astype(np.float32)
444
+
445
+
446
+ def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
447
+ """
448
+ Row-wise L2 normalization for numpy arrays.
449
+ """
450
+ norm = np.linalg.norm(a, axis=1, keepdims=True)
451
+ return a / (norm + eps)
452
 
453
+
454
  # Helper function to load the block catalog from a JSON file
455
  def _load_block_catalog(block_type: str) -> Dict:
456
  """
 
2367
  state["processing"]= False
2368
  return state
2369
 
 
 
 
 
2370
  def extract_images_from_pdf(pdf_stream: io.BytesIO):
2371
  ''' Extract images from PDF and generate structured sprite JSON '''
2372
+ manipulated_json = {}
2373
+ img_elements = []
2374
  try:
2375
  # {
2376
  # pdf_path = Path(pdf_path)
 
2378
  # pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
2379
  # print("-------------------------------pdf_filename-------------------------------",pdf_filename)
2380
  # print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
 
2381
  if isinstance(pdf_stream, io.BytesIO):
2382
  # use a random ID since there's no filename
2383
  pdf_id = uuid.uuid4().hex
2384
  else:
2385
  pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
2386
+
2387
  # extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
2388
  # json_subdir = JSON_DIR / pdf_filename
2389
  # extracted_image_subdir.mkdir(parents=True, exist_ok=True)
 
2398
  # print("-------------------------------final_json_path-------------------------------",final_json_path)
2399
  # print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
2400
 
 
2401
  try:
2402
  elements = partition_pdf(
2403
  # filename=str(pdf_path), # partition_pdf might expect a string
 
2406
  extract_image_block_types=["Image"],
2407
  hi_res_model_name="yolox",
2408
  extract_image_block_to_payload=True,
2409
+ # ocr_languages=ocr_lang,
2410
+ # extract_images_in_pdf=False,
2411
+ # extract_image_block_output_dir=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\pdf_output"
2412
  )
2413
  print(f"ELEMENTS")
2414
  except Exception as e:
2415
  raise RuntimeError(
2416
  f"❌ Failed to extract images from PDF: {str(e)}")
2417
+
2418
  file_elements = [element.to_dict() for element in elements]
2419
+ print(f"========== file elements: \n{file_elements}")
2420
+
2421
  #{
2422
  # try:
2423
  # with open(output_json_path, "w") as f:
 
2425
  # for element in elements], f, indent=4)
2426
  # except Exception as e:
2427
  # raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
2428
+
2429
  # try:
2430
  # # Display extracted images
2431
  # with open(output_json_path, 'r') as file:
2432
  # file_elements = json.load(file)
2433
  # except Exception as e:
2434
+ # raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
2435
  # }
2436
+
2437
  sprite_count = 1
2438
  for el in file_elements:
2439
  img_b64 = el["metadata"].get("image_base64")
2440
  if not img_b64:
2441
+ continue
2442
+
2443
  manipulated_json[f"Sprite {sprite_count}"] = {
2444
  # "id":auto_id,
2445
  # "name": name,
 
2453
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
2454
 
2455
  # def similarity_matching(input_json_path: str, project_folder: str) -> str:
2456
+ # # --- Config (tune threads as needed) ---
2457
+ # DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
2458
+ # DEVICE = torch.device("cpu")
2459
+ # torch.set_num_threads(4) # tune for your CPU
2460
+
2461
+ # --- Globals for single-shot model load ---
2462
+ # _dinov2_processor = None
2463
+ # _dinov2_model = None
2464
+
2465
+
2466
+
2467
+
2468
  def similarity_matching(sprites_data: str, project_folder: str) -> str:
2469
  logger.info("🔍 Running similarity matching…")
2470
  os.makedirs(project_folder, exist_ok=True)
 
2481
  # ==============================
2482
  # READ SPRITE METADATA
2483
  # ==============================
2484
+ # with open(input_json_path, 'r') as f:
2485
+ # sprites_data = json.load(f)
2486
+
2487
  sprite_ids, sprite_base64 = [], []
2488
  for sid, sprite in sprites_data.items():
2489
  sprite_ids.append(sid)
2490
+ # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
2491
  sprite_base64.append(sprite["base64"])
2492
 
2493
  sprite_images_bytes = []
 
2497
  img.save(buffer, format="PNG")
2498
  buffer.seek(0)
2499
  sprite_images_bytes.append(buffer)
2500
+
2501
  # =========================================
2502
  # Build the list of all candidate images
 
2503
  # =========================================
2504
+ folder_image_paths = [
2505
+ BACKDROP_DIR/"Baseball 2.sb3"/"7be1f5b3e682813dac1f297e52ff7dca.png",
2506
+ BACKDROP_DIR/"Beach Malibu.sb3"/"050615fe992a00d6af0e664e497ebf53.png",
2507
+ BACKDROP_DIR/"Bedroom 3.sb3"/"8cc0b88d53345b3e337e8f028a32a4e7.png",
2508
+ BACKDROP_DIR/"Blue Sky.sb3"/"e7c147730f19d284bcd7b3f00af19bb6.png",
2509
+ BACKDROP_DIR/"Castle 2.sb3"/"951765ee7f7370f120c9df20b577c22f.png",
2510
+ BACKDROP_DIR/"Colorful City.sb3"/"04d18ddd1b85f0ea30beb14b8da49f60.png",
2511
+ BACKDROP_DIR/"Hall.sb3"/"ea86ca30b346f27ca5faf1254f6a31e3.png",
2512
+ BACKDROP_DIR/"Jungle.sb3"/"f4f908da19e2753f3ed679d7b37650ca.png",
2513
+ BACKDROP_DIR/"Soccer.sb3"/"04a63154f04b09494354090f7cc2f1b9.png",
2514
+ BACKDROP_DIR/"Theater.sb3"/"c2b097bc5cdb6a14ef5485202bc5ee76.png",
2515
+
2516
+ SPRITE_DIR/"Batter.sprite3"/"592ee9ab2aeefe65cb4fb95fcd046f33.png",
2517
+ SPRITE_DIR/"Batter.sprite3"/"9d193bef6e3d6d8eba6d1470b8bf9351.png",
2518
+ SPRITE_DIR/"Batter.sprite3"/"baseball_sprite_motion_1.png",
2519
+ SPRITE_DIR/"Batter.sprite3"/"bd4fc003528acfa847e45ff82f346eee.png",
2520
+ SPRITE_DIR/"Batter.sprite3"/"fdfde4bcbaca0f68e83fdf3f4ef0c660.png",
2521
+ SPRITE_DIR/"Bear.sprite3"/"6f303e972f33fcb7ef36d0d8012d0975.png",
2522
+ SPRITE_DIR/"Bear.sprite3"/"bear_motion_2.png",
2523
+ SPRITE_DIR/"Bear.sprite3"/"deef1eaa96d550ae6fc11524a1935024.png",
2524
+ SPRITE_DIR/"Beetle.sprite3"/"46d0dfd4ae7e9bfe3a6a2e35a4905eae.png",
2525
+ SPRITE_DIR/"Butterfly 1.sprite3"/"34b76c1835c6a7fc2c47956e49bb0f52.png",
2526
+ SPRITE_DIR/"Butterfly 1.sprite3"/"49c9f952007d870a046cff93b6e5e098.png",
2527
+ SPRITE_DIR/"Butterfly 1.sprite3"/"fe98df7367e314d9640bfaa54fc239be.png",
2528
+ SPRITE_DIR/"Cat.sprite3"/"0fb9be3e8397c983338cb71dc84d0b25.png",
2529
+ SPRITE_DIR/"Cat.sprite3"/"bcf454acf82e4504149f7ffe07081dbc.png",
2530
+ SPRITE_DIR/"Centaur.sprite3"/"2373556e776cad3ba4d6ee04fc34550b.png",
2531
+ SPRITE_DIR/"Centaur.sprite3"/"c00ffa6c5dd0baf9f456b897ff974377.png",
2532
+ SPRITE_DIR/"Centaur.sprite3"/"d722329bd9373ad80625e5be6d52f3ed.png",
2533
+ SPRITE_DIR/"Centaur.sprite3"/"d7aa990538915b7ef1f496d7e8486ade.png",
2534
+ SPRITE_DIR/"City Bus.sprite3"/"7d7e26014a346b894db8ab1819f2167f.png",
2535
+ SPRITE_DIR/"City Bus.sprite3"/"e9694adbff9422363e2ea03166015393.png",
2536
+ SPRITE_DIR/"Crab.sprite3"/"49839aa1b0feed02a3c759db5f8dee71.png",
2537
+ SPRITE_DIR/"Crab.sprite3"/"bear_element.png",
2538
+ SPRITE_DIR/"Crab.sprite3"/"f7cdd2acbc6d7559d33be8675059c79e.png",
2539
+ SPRITE_DIR/"Glow-G.sprite3"/"56839bc48957869d980c6f9b6f5a2a91.png",
2540
+ SPRITE_DIR/"Jordyn.sprite3"/"00c8c464c19460df693f8d5ae69afdab.png",
2541
+ SPRITE_DIR/"Jordyn.sprite3"/"768c4601174f0dfcb96b3080ccc3a192.png",
2542
+ SPRITE_DIR/"Jordyn.sprite3"/"a7cc1e5f02b58ecc8095cfc18eef0289.png",
2543
+ SPRITE_DIR/"Jordyn.sprite3"/"db4d97cbf24e2b8af665bfbf06f67fa0.png",
2544
+ SPRITE_DIR/"Soccer Ball.sprite3"/"5d973d7a3a8be3f3bd6e1cd0f73c32b5.png",
2545
+ SPRITE_DIR/"Soccer Ball.sprite3"/"cat_football.png",
2546
+ SPRITE_DIR/"Star.sprite3"/"551629f2a64c1f3703e57aaa133effa6.png",
2547
+ SPRITE_DIR/"Wizard.sprite3"/"55ba51188af86ca16ef30267e874c1ed.png",
2548
+ SPRITE_DIR/"Wizard.sprite3"/"91d495085eb4d02a375c42f6318071e7.png",
2549
+ SPRITE_DIR/"Wizard.sprite3"/"df943c9894ee4b9df8c5893ce30c2a5f.png",
2550
+
2551
+ # CODE_BLOCKS_DIR/"client_code_block_1.jpg",
2552
+ # CODE_BLOCKS_DIR/"client_code_block_2.jpg",
2553
+ CODE_BLOCKS_DIR/"script1.JPG",
2554
+ CODE_BLOCKS_DIR/"script2.JPG",
2555
+ CODE_BLOCKS_DIR/"script3.JPG",
2556
+ CODE_BLOCKS_DIR/"script4.JPG",
2557
+ CODE_BLOCKS_DIR/"script5.JPG",
2558
+ CODE_BLOCKS_DIR/"script6.JPG",
2559
+ CODE_BLOCKS_DIR/"script7.JPG",
2560
+ CODE_BLOCKS_DIR/"script8.JPG",
2561
+ CODE_BLOCKS_DIR/"script9.JPG",
2562
+ CODE_BLOCKS_DIR/"static_white.png"]
2563
+ folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
2564
+ # =========================================
2565
 
2566
  # -----------------------------------------
2567
+ # Load reference embeddings from JSON
 
2568
  # -----------------------------------------
2569
+ with open(f"{BLOCKS_DIR}/dinov2_embeddings.json", "r") as f:
2570
+ embedding_json = json.load(f)
2571
+
2572
+ # ============================== #
2573
+ # EMBED SPRITE IMAGES #
2574
+ # ============================== #
2575
+ # ensure model is initialized (fast no-op after first call)
2576
+ init_dinov2()
2577
+
2578
+ # embed the incoming sprite BytesIO images (same data structure you already use)
2579
+ sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
2580
+
2581
+ # load reference embeddings from JSON (they must be numeric lists)
2582
+ img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
2583
+
2584
+ # normalize both sides (important — stored embeddings may not be normalized)
2585
+ sprite_matrix = l2_normalize_rows(sprite_matrix)
2586
+ img_matrix = l2_normalize_rows(img_matrix)
2587
+
2588
+ # =========================================
2589
+ # Compute similarities & pick best match
2590
+ # =========================================
2591
+ similarity = np.matmul(sprite_matrix, img_matrix.T)
2592
+ most_similar_indices = np.argmax(similarity, axis=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2593
 
2594
  # =========================================
2595
  # Copy matched sprite assets + collect data
 
2598
  copied_folders = set()
2599
 
2600
  for sprite_idx, matched_idx in enumerate(most_similar_indices):
2601
+ matched_image_path = folder_image_paths[matched_idx]
 
 
 
 
 
 
 
 
 
 
2602
  matched_folder = os.path.dirname(matched_image_path)
2603
 
2604
  # CHANGED: use our new normalized sprite_base_path
 
2615
  logger.warning(f"No sprite.json in {matched_folder}")
2616
  continue
2617
 
2618
+ with open(sprite_json_path, 'r') as f:
2619
  sprite_info = json.load(f)
2620
  # copy all non‐matched files
2621
  for fname in os.listdir(matched_folder):
2622
  if fname in (os.path.basename(matched_image_path), 'sprite.json'):
2623
  continue
2624
+ shutil.copy2(os.path.join(matched_folder, fname),
2625
+ os.path.join(project_folder, fname))
 
 
 
2626
  project_data.append(sprite_info)
2627
 
2628
  # =========================================
 
2631
  backdrop_data = []
2632
  copied_backdrop_folders = set()
2633
  for backdrop_idx, matched_idx in enumerate(most_similar_indices):
2634
+ matched_image_path = folder_image_paths[matched_idx]
 
 
 
 
 
 
 
 
 
 
2635
  matched_folder = os.path.dirname(matched_image_path)
2636
  matched_filename = os.path.basename(matched_image_path)
2637
+
2638
  # CHANGED: use our new normalized backdrop_base_path
2639
  if not matched_folder.startswith(backdrop_base_path):
2640
  continue
 
2655
  logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
2656
  except Exception as e:
2657
  logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
2658
+
2659
  # copy non‐matched files
2660
  for fname in os.listdir(matched_folder):
2661
+ # if fname in (os.path.basename(matched_image_path), 'project.json'):
2662
  if fname in {matched_filename, 'project.json'}:
2663
  continue
2664
+ # shutil.copy2(os.path.join(matched_folder, fname),
2665
+ # os.path.join(project_folder, fname))
2666
  src = os.path.join(matched_folder, fname)
2667
  dst = os.path.join(project_folder, fname)
2668
  if os.path.isfile(src):
 
2675
  # append the stage‐target from its project.json
2676
  pj = os.path.join(matched_folder, 'project.json')
2677
  if os.path.exists(pj):
2678
+ with open(pj, 'r') as f:
2679
  bd_json = json.load(f)
2680
  for tgt in bd_json.get("targets", []):
2681
  if tgt.get("isStage"):
 
2683
  else:
2684
  logger.warning(f"No project.json in {matched_folder}")
2685
 
2686
+
2687
  # =========================================
2688
  # Merge into final Scratch project.json
2689
  # =========================================
 
2711
  if key not in seen_costumes:
2712
  seen_costumes.add(key)
2713
  all_costumes.append(costume)
2714
+
2715
  if i == 0:
2716
  sounds = bd.get("sounds", [])
2717
  stage_obj={
2718
  "isStage": True,
2719
  "name": "Stage",
2720
  "objName": "Stage",
2721
+ "variables": {},
2722
+ "lists": {},
2723
  "broadcasts": {},
2724
+ "blocks": {},
2725
  "comments": {},
2726
  "currentCostume": 1 if len(all_costumes) > 1 else 0,
2727
  "costumes": all_costumes,
2728
  "sounds": sounds,
2729
+ "volume": 100,
2730
  "layerOrder": 0,
2731
+ "tempo": 60,
2732
  "videoTransparency": 50,
2733
  "videoState": "on",
2734
  "textToSpeechLanguage": None
 
2749
  logger.info(f"✅ Default backdrop sound copied to project: {default_backdrop_sound_name}")
2750
  except Exception as e:
2751
  logger.error(f"❌ Failed to copy default backdrop: {e}")
2752
+
2753
  stage_obj={
2754
  "isStage": True,
2755
  "name": "Stage",
 
2790
  }
2791
  final_project["targets"].insert(0, stage_obj)
2792
 
2793
+ with open(project_json_path, 'w') as f:
2794
  json.dump(final_project, f, indent=2)
2795
 
2796
  return project_json_path