Update app.py
Browse files
app.py
CHANGED
|
@@ -2391,84 +2391,156 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 2391 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
| 2392 |
|
| 2393 |
# def similarity_matching(input_json_path: str, project_folder: str) -> str:
|
| 2394 |
-
def similarity_matching(sprites_data:
|
| 2395 |
-
""
|
| 2396 |
-
Finds the best matching assets for input sprites using a FAISS index and builds a Scratch project.
|
| 2397 |
-
|
| 2398 |
-
Args:
|
| 2399 |
-
sprites_data (str): JSON string containing sprite metadata and base64 image data.
|
| 2400 |
-
project_folder (str): The directory where the final project.json and assets will be saved.
|
| 2401 |
-
|
| 2402 |
-
Returns:
|
| 2403 |
-
str: The path to the generated project.json file.
|
| 2404 |
-
"""
|
| 2405 |
-
logger.info("🔍 Running similarity matching with FAISS...")
|
| 2406 |
os.makedirs(project_folder, exist_ok=True)
|
| 2407 |
-
|
| 2408 |
-
project_json_path = os.path.join(project_folder, 'project.json')
|
| 2409 |
-
# Lazily load the model and FAISS index on the first call
|
| 2410 |
-
load_model_and_index()
|
| 2411 |
-
|
| 2412 |
-
# ==================================================== #
|
| 2413 |
-
# 1. PREPARE INPUT SPRITE IMAGES FROM BASE64 DATA #
|
| 2414 |
-
# ==================================================== #
|
| 2415 |
-
# The input is a JSON string, so we first need to parse it.
|
| 2416 |
-
#sprites_data_dict = json.loads(sprites_data)
|
| 2417 |
-
|
| 2418 |
-
# ...
|
| 2419 |
-
# The 'sprites_data' variable is already a dictionary.
|
| 2420 |
-
|
| 2421 |
-
pil_images = []
|
| 2422 |
-
# Use sprites_data directly
|
| 2423 |
-
for sid, sprite in sprites_data.items():
|
| 2424 |
-
#for sid, sprite in sprites_data_dict.items():
|
| 2425 |
-
base64_string = sprite["base64"]
|
| 2426 |
-
# Decode the base64 string to bytes, then open as a PIL Image
|
| 2427 |
-
image_data = base64.b64decode(base64_string.split(',')[-1])
|
| 2428 |
-
image = Image.open(BytesIO(image_data)).convert("RGB")
|
| 2429 |
-
pil_images.append(image)
|
| 2430 |
-
|
| 2431 |
-
if not pil_images:
|
| 2432 |
-
logger.warning("No images found in the input sprites_data. Aborting.")
|
| 2433 |
-
# Handle case with no images if necessary
|
| 2434 |
-
return ""
|
| 2435 |
|
| 2436 |
-
#
|
| 2437 |
-
#
|
| 2438 |
-
# =========================================== #
|
| 2439 |
-
logger.info(f"Encoding {len(pil_images)} input sprite(s)...")
|
| 2440 |
-
|
| 2441 |
-
# The model can encode a list of PIL images directly in a batch
|
| 2442 |
-
query_embeddings = MODEL.encode(pil_images, convert_to_tensor=True, show_progress_bar=False)
|
| 2443 |
-
|
| 2444 |
-
# Ensure the embeddings are float32 numpy arrays for FAISS
|
| 2445 |
-
query_embeddings_np = query_embeddings.cpu().numpy().astype(np.float32)
|
| 2446 |
-
|
| 2447 |
-
# We only need the single best match for each sprite, so k=1
|
| 2448 |
-
k = 1
|
| 2449 |
-
logger.info(f"Searching for the top {k} match for each sprite in FAISS index...")
|
| 2450 |
-
distances, indices = FAISS_INDEX.search(query_embeddings_np, k)
|
| 2451 |
-
|
| 2452 |
-
# `indices` is a 2D array, e.g., [[23], [105], [42]]. We want a flat list of these indices.
|
| 2453 |
-
most_similar_indices = indices.flatten()
|
| 2454 |
-
|
| 2455 |
-
# ========================================================= #
|
| 2456 |
-
# 3. COPY MATCHED ASSETS & BUILD THE FINAL project.json #
|
| 2457 |
-
# (This part remains the same as your original logic) #
|
| 2458 |
-
# ========================================================= #
|
| 2459 |
-
logger.info("Processing matched assets and building project.json...")
|
| 2460 |
-
|
| 2461 |
backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 2462 |
-
sprite_base_path
|
| 2463 |
-
|
| 2464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2465 |
copied_folders = set()
|
| 2466 |
-
|
| 2467 |
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
| 2468 |
-
|
| 2469 |
-
|
| 2470 |
-
matched_folder = os.path.dirname(matched_image_path)
|
| 2471 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2472 |
if not matched_folder.startswith(sprite_base_path):
|
| 2473 |
continue
|
| 2474 |
|
|
@@ -2482,33 +2554,39 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2482 |
logger.warning(f"No sprite.json in {matched_folder}")
|
| 2483 |
continue
|
| 2484 |
|
| 2485 |
-
with open(sprite_json_path, 'r') as f:
|
| 2486 |
sprite_info = json.load(f)
|
| 2487 |
-
|
| 2488 |
-
# Copy all non-matched files
|
| 2489 |
for fname in os.listdir(matched_folder):
|
| 2490 |
if fname in (os.path.basename(matched_image_path), 'sprite.json'):
|
| 2491 |
continue
|
| 2492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2493 |
project_data.append(sprite_info)
|
| 2494 |
|
| 2495 |
-
# (The rest of your backdrop handling and project.json finalization logic would go here,
|
| 2496 |
-
# unchanged from your original script. I am omitting it for brevity but you should include it.)
|
| 2497 |
-
# =========================================
|
| 2498 |
-
# Copy matched backdrop assets + collect
|
| 2499 |
-
# =========================================
|
| 2500 |
# =========================================
|
| 2501 |
# Copy matched backdrop assets + collect
|
| 2502 |
# =========================================
|
| 2503 |
backdrop_data = []
|
| 2504 |
copied_backdrop_folders = set()
|
| 2505 |
for backdrop_idx, matched_idx in enumerate(most_similar_indices):
|
| 2506 |
-
|
| 2507 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2508 |
matched_folder = os.path.dirname(matched_image_path)
|
| 2509 |
-
# ... rest of the loop
|
| 2510 |
matched_filename = os.path.basename(matched_image_path)
|
| 2511 |
-
|
| 2512 |
# CHANGED: use our new normalized backdrop_base_path
|
| 2513 |
if not matched_folder.startswith(backdrop_base_path):
|
| 2514 |
continue
|
|
@@ -2529,14 +2607,11 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2529 |
logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
|
| 2530 |
except Exception as e:
|
| 2531 |
logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
|
| 2532 |
-
|
| 2533 |
# copy non‐matched files
|
| 2534 |
for fname in os.listdir(matched_folder):
|
| 2535 |
-
# if fname in (os.path.basename(matched_image_path), 'project.json'):
|
| 2536 |
if fname in {matched_filename, 'project.json'}:
|
| 2537 |
continue
|
| 2538 |
-
# shutil.copy2(os.path.join(matched_folder, fname),
|
| 2539 |
-
# os.path.join(project_folder, fname))
|
| 2540 |
src = os.path.join(matched_folder, fname)
|
| 2541 |
dst = os.path.join(project_folder, fname)
|
| 2542 |
if os.path.isfile(src):
|
|
@@ -2549,7 +2624,7 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2549 |
# append the stage‐target from its project.json
|
| 2550 |
pj = os.path.join(matched_folder, 'project.json')
|
| 2551 |
if os.path.exists(pj):
|
| 2552 |
-
with open(pj, 'r') as f:
|
| 2553 |
bd_json = json.load(f)
|
| 2554 |
for tgt in bd_json.get("targets", []):
|
| 2555 |
if tgt.get("isStage"):
|
|
@@ -2557,7 +2632,6 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2557 |
else:
|
| 2558 |
logger.warning(f"No project.json in {matched_folder}")
|
| 2559 |
|
| 2560 |
-
|
| 2561 |
# =========================================
|
| 2562 |
# Merge into final Scratch project.json
|
| 2563 |
# =========================================
|
|
@@ -2585,31 +2659,31 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2585 |
if key not in seen_costumes:
|
| 2586 |
seen_costumes.add(key)
|
| 2587 |
all_costumes.append(costume)
|
| 2588 |
-
|
| 2589 |
if i == 0:
|
| 2590 |
sounds = bd.get("sounds", [])
|
| 2591 |
stage_obj={
|
| 2592 |
"isStage": True,
|
| 2593 |
"name": "Stage",
|
| 2594 |
"objName": "Stage",
|
| 2595 |
-
"variables": {},
|
| 2596 |
-
"lists": {},
|
| 2597 |
"broadcasts": {},
|
| 2598 |
-
"blocks": {},
|
| 2599 |
"comments": {},
|
| 2600 |
"currentCostume": 1 if len(all_costumes) > 1 else 0,
|
| 2601 |
"costumes": all_costumes,
|
| 2602 |
"sounds": sounds,
|
| 2603 |
-
"volume": 100,
|
| 2604 |
"layerOrder": 0,
|
| 2605 |
-
"tempo": 60,
|
| 2606 |
"videoTransparency": 50,
|
| 2607 |
"videoState": "on",
|
| 2608 |
"textToSpeechLanguage": None
|
| 2609 |
}
|
| 2610 |
final_project["targets"].insert(0, stage_obj)
|
| 2611 |
else:
|
| 2612 |
-
logger.warning(
|
| 2613 |
default_backdrop_path = BACKDROP_DIR / "cd21514d0531fdffb22204e0ec5ed84a.svg"
|
| 2614 |
default_backdrop_name = "cd21514d0531fdffb22204e0ec5ed84a.svg"
|
| 2615 |
|
|
@@ -2623,7 +2697,7 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2623 |
logger.info(f"✅ Default backdrop sound copied to project: {default_backdrop_sound_name}")
|
| 2624 |
except Exception as e:
|
| 2625 |
logger.error(f"❌ Failed to copy default backdrop: {e}")
|
| 2626 |
-
|
| 2627 |
stage_obj={
|
| 2628 |
"isStage": True,
|
| 2629 |
"name": "Stage",
|
|
@@ -2664,10 +2738,11 @@ def similarity_matching(sprites_data: dict, project_folder: str) -> str:
|
|
| 2664 |
}
|
| 2665 |
final_project["targets"].insert(0, stage_obj)
|
| 2666 |
|
| 2667 |
-
with open(project_json_path, 'w') as f:
|
| 2668 |
json.dump(final_project, f, indent=2)
|
| 2669 |
|
| 2670 |
return project_json_path
|
|
|
|
| 2671 |
# def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
| 2672 |
# logger.info("🔍 Running similarity matching…")
|
| 2673 |
# os.makedirs(project_folder, exist_ok=True)
|
|
|
|
| 2391 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
| 2392 |
|
| 2393 |
# def similarity_matching(input_json_path: str, project_folder: str) -> str:
|
| 2394 |
+
def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
| 2395 |
+
logger.info("🔍 Running similarity matching…")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2396 |
os.makedirs(project_folder, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2397 |
|
| 2398 |
+
# ----------------------------------------
|
| 2399 |
+
# CHANGED: define normalized base-paths so startswith() checks work
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2400 |
backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 2401 |
+
sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 2402 |
+
code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
|
| 2403 |
+
# ----------------------------------------
|
| 2404 |
+
|
| 2405 |
+
project_json_path = os.path.join(project_folder, "project.json")
|
| 2406 |
+
|
| 2407 |
+
# ==============================
|
| 2408 |
+
# READ SPRITE METADATA
|
| 2409 |
+
# ==============================
|
| 2410 |
+
sprite_ids, sprite_base64 = [], []
|
| 2411 |
+
for sid, sprite in sprites_data.items():
|
| 2412 |
+
sprite_ids.append(sid)
|
| 2413 |
+
sprite_base64.append(sprite["base64"])
|
| 2414 |
+
|
| 2415 |
+
sprite_images_bytes = []
|
| 2416 |
+
for b64 in sprite_base64:
|
| 2417 |
+
img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
|
| 2418 |
+
buffer = BytesIO()
|
| 2419 |
+
img.save(buffer, format="PNG")
|
| 2420 |
+
buffer.seek(0)
|
| 2421 |
+
sprite_images_bytes.append(buffer)
|
| 2422 |
+
|
| 2423 |
+
# =========================================
|
| 2424 |
+
# Build the list of all candidate images
|
| 2425 |
+
# (REPLACED: now loaded from FAISS paths JSON)
|
| 2426 |
+
# =========================================
|
| 2427 |
+
# NOTE: previously you had a hardcoded `folder_image_paths` sample.
|
| 2428 |
+
# We'll load image paths from BLOCKS_DIR/image_paths.json (faiss index companion).
|
| 2429 |
+
try:
|
| 2430 |
+
index_path = os.path.join(str(BLOCKS_DIR), "faiss_index 2.bin")
|
| 2431 |
+
paths_json_path = os.path.join(str(BLOCKS_DIR), "image_paths 2.json")
|
| 2432 |
+
# load image paths (these should be full paths or relative paths you used when building the index)
|
| 2433 |
+
with open(paths_json_path, "r", encoding="utf-8") as f:
|
| 2434 |
+
image_paths = json.load(f)
|
| 2435 |
+
folder_image_paths = [os.path.normpath(str(p)) for p in image_paths]
|
| 2436 |
+
except Exception as e:
|
| 2437 |
+
# Fallback: if image_paths.json not available, fall back to any existing embed.json usage
|
| 2438 |
+
logger.error(f"Failed to load FAISS paths JSON from {paths_json_path}: {e}")
|
| 2439 |
+
# try to load previous embedding_json and recover its paths (if present)
|
| 2440 |
+
try:
|
| 2441 |
+
with open(f"{BLOCKS_DIR}/embed.json", "r", encoding="utf-8") as f:
|
| 2442 |
+
embedding_json = json.load(f)
|
| 2443 |
+
# embedding_json entries expected to have a 'path' or similar field; try common keys
|
| 2444 |
+
folder_image_paths = []
|
| 2445 |
+
for idx_entry in embedding_json:
|
| 2446 |
+
if "path" in idx_entry:
|
| 2447 |
+
folder_image_paths.append(os.path.normpath(str(idx_entry["path"])))
|
| 2448 |
+
elif "filename" in idx_entry:
|
| 2449 |
+
folder_image_paths.append(os.path.normpath(str(idx_entry["filename"])))
|
| 2450 |
+
else:
|
| 2451 |
+
# If no path available, synthesize a placeholder to avoid crashing later
|
| 2452 |
+
folder_image_paths.append(os.path.normpath(str(BLOCKS_DIR)))
|
| 2453 |
+
logger.warning("Using embed.json fallback to populate folder_image_paths.")
|
| 2454 |
+
except Exception as e2:
|
| 2455 |
+
logger.error(f"Failed to load embed.json fallback: {e2}")
|
| 2456 |
+
folder_image_paths = []
|
| 2457 |
+
|
| 2458 |
+
# -----------------------------------------
|
| 2459 |
+
# Load FAISS index and SentenceTransformer model
|
| 2460 |
+
# and perform search for each sprite image
|
| 2461 |
+
# -----------------------------------------
|
| 2462 |
+
most_similar_indices = None
|
| 2463 |
+
try:
|
| 2464 |
+
import faiss # local import in case not available globally
|
| 2465 |
+
from sentence_transformers import SentenceTransformer
|
| 2466 |
+
# load model (this may take time but ensures correct embedder)
|
| 2467 |
+
logger.info("Loading SentenceTransformer 'clip-ViT-L-14' model for embeddings...")
|
| 2468 |
+
st_model = SentenceTransformer('clip-ViT-L-14')
|
| 2469 |
+
|
| 2470 |
+
logger.info(f"Loading FAISS index from {index_path} ...")
|
| 2471 |
+
index = faiss.read_index(index_path)
|
| 2472 |
+
|
| 2473 |
+
# Convert BytesIO images to PIL images for batch encoding
|
| 2474 |
+
pil_images = []
|
| 2475 |
+
for buf in sprite_images_bytes:
|
| 2476 |
+
buf.seek(0)
|
| 2477 |
+
pil = Image.open(buf).convert("RGB")
|
| 2478 |
+
pil_images.append(pil)
|
| 2479 |
+
|
| 2480 |
+
if len(pil_images) == 0:
|
| 2481 |
+
raise ValueError("No sprite images available to encode.")
|
| 2482 |
+
|
| 2483 |
+
logger.info(f"Encoding {len(pil_images)} sprite images with SentenceTransformer...")
|
| 2484 |
+
# encode returns numpy array (n_samples, dim)
|
| 2485 |
+
sprite_embeddings = st_model.encode(pil_images, batch_size=8, convert_to_numpy=True)
|
| 2486 |
+
# ensure float32 and contiguous
|
| 2487 |
+
sprite_embeddings = np.ascontiguousarray(sprite_embeddings.astype(np.float32))
|
| 2488 |
+
|
| 2489 |
+
# number of neighbors to request (we only need the top-1 to emulate previous behavior)
|
| 2490 |
+
k = 1
|
| 2491 |
+
logger.info(f"Performing FAISS search (k={k}) for all sprites...")
|
| 2492 |
+
distances, indices = index.search(sprite_embeddings, k) # distances.shape = (N, k); indices.shape = (N, k)
|
| 2493 |
+
# take top-1 index for each sprite
|
| 2494 |
+
if indices is not None and indices.shape[0] > 0:
|
| 2495 |
+
most_similar_indices = indices[:, 0].tolist()
|
| 2496 |
+
else:
|
| 2497 |
+
most_similar_indices = []
|
| 2498 |
+
except Exception as e:
|
| 2499 |
+
logger.error(f"FAISS / SentenceTransformer matching failed: {e}. Falling back to simple in-memory similarity if possible.")
|
| 2500 |
+
# fallback: if we previously loaded embedding_json, use that in-memory approach (cosine)
|
| 2501 |
+
try:
|
| 2502 |
+
if 'embedding_json' not in locals():
|
| 2503 |
+
with open(f"{BLOCKS_DIR}/embed.json", "r", encoding="utf-8") as f:
|
| 2504 |
+
embedding_json = json.load(f)
|
| 2505 |
+
# build img_matrix from embedding_json entries (assumes "embeddings" key)
|
| 2506 |
+
img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 2507 |
+
# compute embeddings for sprites using any available embed function (init_dinov2/embed_bytesio_list previously used)
|
| 2508 |
+
init_dinov2()
|
| 2509 |
+
sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8)
|
| 2510 |
+
sprite_matrix = l2_normalize_rows(sprite_matrix)
|
| 2511 |
+
img_matrix = l2_normalize_rows(img_matrix)
|
| 2512 |
+
similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 2513 |
+
most_similar_indices = np.argmax(similarity, axis=1).tolist()
|
| 2514 |
+
logger.info("Fallback in-memory similarity search succeeded.")
|
| 2515 |
+
except Exception as e2:
|
| 2516 |
+
logger.error(f"Fallback similarity also failed: {e2}")
|
| 2517 |
+
most_similar_indices = []
|
| 2518 |
+
|
| 2519 |
+
# Ensure we have a list with same length as number of sprites (or empty)
|
| 2520 |
+
if most_similar_indices is None:
|
| 2521 |
+
most_similar_indices = []
|
| 2522 |
+
|
| 2523 |
+
# =========================================
|
| 2524 |
+
# Copy matched sprite assets + collect data
|
| 2525 |
+
# =========================================
|
| 2526 |
+
project_data = []
|
| 2527 |
copied_folders = set()
|
| 2528 |
+
|
| 2529 |
for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
| 2530 |
+
if matched_idx is None:
|
| 2531 |
+
continue
|
|
|
|
| 2532 |
|
| 2533 |
+
# protect against out-of-range indices
|
| 2534 |
+
try:
|
| 2535 |
+
matched_image_path = folder_image_paths[int(matched_idx)]
|
| 2536 |
+
except Exception as e:
|
| 2537 |
+
logger.warning(f"Matched index {matched_idx} invalid: {e}")
|
| 2538 |
+
continue
|
| 2539 |
+
|
| 2540 |
+
matched_image_path = os.path.normpath(str(matched_image_path))
|
| 2541 |
+
matched_folder = os.path.dirname(matched_image_path)
|
| 2542 |
+
|
| 2543 |
+
# CHANGED: use our new normalized sprite_base_path
|
| 2544 |
if not matched_folder.startswith(sprite_base_path):
|
| 2545 |
continue
|
| 2546 |
|
|
|
|
| 2554 |
logger.warning(f"No sprite.json in {matched_folder}")
|
| 2555 |
continue
|
| 2556 |
|
| 2557 |
+
with open(sprite_json_path, 'r', encoding='utf-8') as f:
|
| 2558 |
sprite_info = json.load(f)
|
| 2559 |
+
# copy all non‐matched files
|
|
|
|
| 2560 |
for fname in os.listdir(matched_folder):
|
| 2561 |
if fname in (os.path.basename(matched_image_path), 'sprite.json'):
|
| 2562 |
continue
|
| 2563 |
+
try:
|
| 2564 |
+
shutil.copy2(os.path.join(matched_folder, fname),
|
| 2565 |
+
os.path.join(project_folder, fname))
|
| 2566 |
+
except Exception as e:
|
| 2567 |
+
logger.error(f"Failed to copy sprite asset {fname} from {matched_folder}: {e}")
|
| 2568 |
project_data.append(sprite_info)
|
| 2569 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2570 |
# =========================================
|
| 2571 |
# Copy matched backdrop assets + collect
|
| 2572 |
# =========================================
|
| 2573 |
backdrop_data = []
|
| 2574 |
copied_backdrop_folders = set()
|
| 2575 |
for backdrop_idx, matched_idx in enumerate(most_similar_indices):
|
| 2576 |
+
if matched_idx is None:
|
| 2577 |
+
continue
|
| 2578 |
+
|
| 2579 |
+
# protect against out-of-range indices
|
| 2580 |
+
try:
|
| 2581 |
+
matched_image_path = folder_image_paths[int(matched_idx)]
|
| 2582 |
+
except Exception as e:
|
| 2583 |
+
logger.warning(f"Matched index {matched_idx} invalid for backdrop loop: {e}")
|
| 2584 |
+
continue
|
| 2585 |
+
|
| 2586 |
+
matched_image_path = os.path.normpath(str(matched_image_path))
|
| 2587 |
matched_folder = os.path.dirname(matched_image_path)
|
|
|
|
| 2588 |
matched_filename = os.path.basename(matched_image_path)
|
| 2589 |
+
|
| 2590 |
# CHANGED: use our new normalized backdrop_base_path
|
| 2591 |
if not matched_folder.startswith(backdrop_base_path):
|
| 2592 |
continue
|
|
|
|
| 2607 |
logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
|
| 2608 |
except Exception as e:
|
| 2609 |
logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
|
| 2610 |
+
|
| 2611 |
# copy non‐matched files
|
| 2612 |
for fname in os.listdir(matched_folder):
|
|
|
|
| 2613 |
if fname in {matched_filename, 'project.json'}:
|
| 2614 |
continue
|
|
|
|
|
|
|
| 2615 |
src = os.path.join(matched_folder, fname)
|
| 2616 |
dst = os.path.join(project_folder, fname)
|
| 2617 |
if os.path.isfile(src):
|
|
|
|
| 2624 |
# append the stage‐target from its project.json
|
| 2625 |
pj = os.path.join(matched_folder, 'project.json')
|
| 2626 |
if os.path.exists(pj):
|
| 2627 |
+
with open(pj, 'r', encoding='utf-8') as f:
|
| 2628 |
bd_json = json.load(f)
|
| 2629 |
for tgt in bd_json.get("targets", []):
|
| 2630 |
if tgt.get("isStage"):
|
|
|
|
| 2632 |
else:
|
| 2633 |
logger.warning(f"No project.json in {matched_folder}")
|
| 2634 |
|
|
|
|
| 2635 |
# =========================================
|
| 2636 |
# Merge into final Scratch project.json
|
| 2637 |
# =========================================
|
|
|
|
| 2659 |
if key not in seen_costumes:
|
| 2660 |
seen_costumes.add(key)
|
| 2661 |
all_costumes.append(costume)
|
| 2662 |
+
|
| 2663 |
if i == 0:
|
| 2664 |
sounds = bd.get("sounds", [])
|
| 2665 |
stage_obj={
|
| 2666 |
"isStage": True,
|
| 2667 |
"name": "Stage",
|
| 2668 |
"objName": "Stage",
|
| 2669 |
+
"variables": {},
|
| 2670 |
+
"lists": {},
|
| 2671 |
"broadcasts": {},
|
| 2672 |
+
"blocks": {},
|
| 2673 |
"comments": {},
|
| 2674 |
"currentCostume": 1 if len(all_costumes) > 1 else 0,
|
| 2675 |
"costumes": all_costumes,
|
| 2676 |
"sounds": sounds,
|
| 2677 |
+
"volume": 100,
|
| 2678 |
"layerOrder": 0,
|
| 2679 |
+
"tempo": 60,
|
| 2680 |
"videoTransparency": 50,
|
| 2681 |
"videoState": "on",
|
| 2682 |
"textToSpeechLanguage": None
|
| 2683 |
}
|
| 2684 |
final_project["targets"].insert(0, stage_obj)
|
| 2685 |
else:
|
| 2686 |
+
logger.warning(⚠️ No backdrop matched. Using default static backdrop.")
|
| 2687 |
default_backdrop_path = BACKDROP_DIR / "cd21514d0531fdffb22204e0ec5ed84a.svg"
|
| 2688 |
default_backdrop_name = "cd21514d0531fdffb22204e0ec5ed84a.svg"
|
| 2689 |
|
|
|
|
| 2697 |
logger.info(f"✅ Default backdrop sound copied to project: {default_backdrop_sound_name}")
|
| 2698 |
except Exception as e:
|
| 2699 |
logger.error(f"❌ Failed to copy default backdrop: {e}")
|
| 2700 |
+
|
| 2701 |
stage_obj={
|
| 2702 |
"isStage": True,
|
| 2703 |
"name": "Stage",
|
|
|
|
| 2738 |
}
|
| 2739 |
final_project["targets"].insert(0, stage_obj)
|
| 2740 |
|
| 2741 |
+
with open(project_json_path, 'w', encoding='utf-8') as f:
|
| 2742 |
json.dump(final_project, f, indent=2)
|
| 2743 |
|
| 2744 |
return project_json_path
|
| 2745 |
+
|
| 2746 |
# def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
| 2747 |
# logger.info("🔍 Running similarity matching…")
|
| 2748 |
# os.makedirs(project_folder, exist_ok=True)
|