prthm11 commited on
Commit
ada005f
·
verified ·
1 Parent(s): 5727fc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -141
app.py CHANGED
@@ -703,146 +703,6 @@ def choose_top_candidates(embedding_results, phash_results, imgmatch_results, to
703
 
704
  return result
705
 
706
-
707
- def hybrid_similarity_matching(sprite_images_bytes, sprite_ids, min_similarity=None, top_k=5, method_weights=(0.5,0.3,0.2)):
708
- from PIL import Image
709
- # Local safe defaults
710
- embeddings_path = os.path.join(BLOCKS_DIR, "hybrid_embeddings.json")
711
- hash_path = os.path.join(BLOCKS_DIR, "phash_data.json")
712
- signature_path = os.path.join(BLOCKS_DIR, "signature_data.json")
713
-
714
- # Load embeddings
715
- embedding_json = {}
716
- if os.path.exists(embeddings_path):
717
- with open(embeddings_path, "r", encoding="utf-8") as f:
718
- embedding_json = json.load(f)
719
-
720
- # Load phash data (if exists) -> ensure hash_dict variable exists
721
- hash_dict = {}
722
- if os.path.exists(hash_path):
723
- try:
724
- with open(hash_path, "r", encoding="utf-8") as f:
725
- hash_data = json.load(f)
726
- for path, hash_str in hash_data.items():
727
- try:
728
- hash_dict[path] = hash_str
729
- except Exception:
730
- pass
731
- except Exception:
732
- pass
733
-
734
- # Load signature data (if exists) -> ensure signature_dict exists
735
- signature_dict = {}
736
- sig_data = {}
737
- if os.path.exists(signature_path):
738
- try:
739
- with open(signature_path, "r", encoding="utf-8") as f:
740
- sig_data = json.load(f)
741
- for path, sig_list in sig_data.items():
742
- try:
743
- signature_dict[path] = np.array(sig_list)
744
- except Exception:
745
- pass
746
- except Exception:
747
- pass
748
-
749
- # Parse embeddings into lists
750
- paths_list = []
751
- embeddings_list = []
752
- if isinstance(embedding_json, dict):
753
- for p, emb in embedding_json.items():
754
- if isinstance(emb, dict):
755
- maybe_emb = emb.get("embedding") or emb.get("embeddings") or emb.get("emb")
756
- if maybe_emb is None:
757
- continue
758
- arr = np.asarray(maybe_emb, dtype=np.float32)
759
- elif isinstance(emb, list):
760
- arr = np.asarray(emb, dtype=np.float32)
761
- else:
762
- continue
763
- paths_list.append(os.path.normpath(str(p)))
764
- embeddings_list.append(arr)
765
- elif isinstance(embedding_json, list):
766
- for item in embedding_json:
767
- if not isinstance(item, dict):
768
- continue
769
- p = item.get("path") or item.get("image_path") or item.get("file") or item.get("filename") or item.get("img_path")
770
- emb = item.get("embeddings") or item.get("embedding") or item.get("features") or item.get("vector") or item.get("emb")
771
- if p is None or emb is None:
772
- continue
773
- paths_list.append(os.path.normpath(str(p)))
774
- embeddings_list.append(np.asarray(emb, dtype=np.float32))
775
-
776
- if len(paths_list) == 0:
777
- print("⚠ No reference images/embeddings found (this test harness may be running without data)")
778
- # Return empty results gracefully
779
- return [[] for _ in sprite_images_bytes], [[] for _ in sprite_images_bytes], []
780
-
781
- ref_matrix = np.vstack(embeddings_list).astype(np.float32)
782
-
783
- # Batch: Get all sprite embeddings, phash, sigs first
784
- sprite_emb_list = []
785
- sprite_phash_list = []
786
- sprite_sig_list = []
787
- per_sprite_final_indices = []
788
- per_sprite_final_scores = []
789
- per_sprite_rerank_debug = []
790
- for i, sprite_bytes in enumerate(sprite_images_bytes):
791
- sprite_pil = Image.open(sprite_bytes)
792
- enhanced_sprite = process_image_cv2_from_pil(sprite_pil, scale=2) or sprite_pil
793
- # sprite_emb = get_dinov2_embedding_from_pil(preprocess_for_model(enhanced_sprite)) or np.zeros(ref_matrix.shape[1])
794
- # sprite_emb_list.append(sprite_emb)
795
- sprite_emb = get_dinov2_embedding_from_pil(preprocess_for_model(enhanced_sprite))
796
- sprite_emb = sprite_emb if sprite_emb is not None else np.zeros(ref_matrix.shape[1])
797
- sprite_emb_list.append(sprite_emb)
798
- # Perceptual hash
799
- sprite_hash_arr = preprocess_for_hash(enhanced_sprite)
800
- sprite_phash = None
801
- if sprite_hash_arr is not None:
802
- try: sprite_phash = phash.encode_image(image_array=sprite_hash_arr)
803
- except: pass
804
- sprite_phash_list.append(sprite_phash)
805
- # Signature
806
- sprite_sig = None
807
- embedding_results, phash_results, imgmatch_results, combined_results = run_query_search_flow(
808
- query_b64=sprite_b64_clean[i],
809
- processed_dir=BLOCKS_DIR,
810
- embeddings_dict=embedding_json,
811
- hash_dict=hash_data,
812
- signature_obj_map=sig_data,
813
- gis=gis,
814
- phash=phash,
815
- MAX_PHASH_BITS=64,
816
- k=5
817
- )
818
- # Call the advanced re-ranker
819
- rerank_result = choose_top_candidates(embedding_results, phash_results, imgmatch_results,
820
- top_k=top_k, method_weights=method_weights, verbose=True)
821
- per_sprite_rerank_debug.append(rerank_result)
822
-
823
- # Selection logic: prefer consensus, else weighted top-1
824
- final = None
825
- if len(rerank_result["consensus_topk"]) > 0:
826
- consensus = rerank_result["consensus_topk"]
827
- best = max(consensus, key=lambda p: rerank_result["weighted_scores_full"].get(p, 0.0))
828
- final = best
829
- else:
830
- final = rerank_result["weighted_topk"][0][0] if rerank_result["weighted_topk"] else None
831
-
832
- # Store index and score for downstream use
833
- if final is not None and final in paths_list:
834
- idx = paths_list.index(final)
835
- score = rerank_result["weighted_scores_full"].get(final, 0.0)
836
- per_sprite_final_indices.append([idx])
837
- per_sprite_final_scores.append([score])
838
- print(f"Sprite '{sprite_ids}' FINAL selected: {final} (index {idx}) score={score:.4f}")
839
- else:
840
- per_sprite_final_indices.append([])
841
- per_sprite_final_scores.append([])
842
-
843
- return per_sprite_final_indices, per_sprite_final_scores, paths_list#, per_sprite_rerank_debug
844
-
845
-
846
  def is_subpath(path: str, base: str) -> bool:
847
  """Return True if path is inside base (works across OSes)."""
848
  try:
@@ -1847,7 +1707,6 @@ def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1,
1847
  code_blocks_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\code_blocks"
1848
  # out_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\out_json"
1849
 
1850
-
1851
  project_json_path = os.path.join(project_folder, "project.json")
1852
 
1853
  # -------------------------
@@ -1871,6 +1730,145 @@ def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1,
1871
  img.save(buffer, format="PNG")
1872
  buffer.seek(0)
1873
  sprite_images_bytes.append(buffer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1874
 
1875
  # Use hybrid matching system
1876
  per_sprite_matched_indices, per_sprite_scores, paths_list = hybrid_similarity_matching(
 
703
 
704
  return result
705
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  def is_subpath(path: str, base: str) -> bool:
707
  """Return True if path is inside base (works across OSes)."""
708
  try:
 
1707
  code_blocks_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\code_blocks"
1708
  # out_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\out_json"
1709
 
 
1710
  project_json_path = os.path.join(project_folder, "project.json")
1711
 
1712
  # -------------------------
 
1730
  img.save(buffer, format="PNG")
1731
  buffer.seek(0)
1732
  sprite_images_bytes.append(buffer)
1733
+
1734
+ def hybrid_similarity_matching(sprite_images_bytes, sprite_ids, min_similarity=None, top_k=5, method_weights=(0.5,0.3,0.2)):
1735
+ from PIL import Image
1736
+ # Local safe defaults
1737
+ embeddings_path = os.path.join(BLOCKS_DIR, "hybrid_embeddings.json")
1738
+ hash_path = os.path.join(BLOCKS_DIR, "phash_data.json")
1739
+ signature_path = os.path.join(BLOCKS_DIR, "signature_data.json")
1740
+
1741
+ # Load embeddings
1742
+ embedding_json = {}
1743
+ if os.path.exists(embeddings_path):
1744
+ with open(embeddings_path, "r", encoding="utf-8") as f:
1745
+ embedding_json = json.load(f)
1746
+
1747
+ # Load phash data (if exists) -> ensure hash_dict variable exists
1748
+ hash_dict = {}
1749
+ if os.path.exists(hash_path):
1750
+ try:
1751
+ with open(hash_path, "r", encoding="utf-8") as f:
1752
+ hash_data = json.load(f)
1753
+ for path, hash_str in hash_data.items():
1754
+ try:
1755
+ hash_dict[path] = hash_str
1756
+ except Exception:
1757
+ pass
1758
+ except Exception:
1759
+ pass
1760
+
1761
+ # Load signature data (if exists) -> ensure signature_dict exists
1762
+ signature_dict = {}
1763
+ sig_data = {}
1764
+ if os.path.exists(signature_path):
1765
+ try:
1766
+ with open(signature_path, "r", encoding="utf-8") as f:
1767
+ sig_data = json.load(f)
1768
+ for path, sig_list in sig_data.items():
1769
+ try:
1770
+ signature_dict[path] = np.array(sig_list)
1771
+ except Exception:
1772
+ pass
1773
+ except Exception:
1774
+ pass
1775
+
1776
+ # Parse embeddings into lists
1777
+ paths_list = []
1778
+ embeddings_list = []
1779
+ if isinstance(embedding_json, dict):
1780
+ for p, emb in embedding_json.items():
1781
+ if isinstance(emb, dict):
1782
+ maybe_emb = emb.get("embedding") or emb.get("embeddings") or emb.get("emb")
1783
+ if maybe_emb is None:
1784
+ continue
1785
+ arr = np.asarray(maybe_emb, dtype=np.float32)
1786
+ elif isinstance(emb, list):
1787
+ arr = np.asarray(emb, dtype=np.float32)
1788
+ else:
1789
+ continue
1790
+ paths_list.append(os.path.normpath(str(p)))
1791
+ embeddings_list.append(arr)
1792
+ elif isinstance(embedding_json, list):
1793
+ for item in embedding_json:
1794
+ if not isinstance(item, dict):
1795
+ continue
1796
+ p = item.get("path") or item.get("image_path") or item.get("file") or item.get("filename") or item.get("img_path")
1797
+ emb = item.get("embeddings") or item.get("embedding") or item.get("features") or item.get("vector") or item.get("emb")
1798
+ if p is None or emb is None:
1799
+ continue
1800
+ paths_list.append(os.path.normpath(str(p)))
1801
+ embeddings_list.append(np.asarray(emb, dtype=np.float32))
1802
+
1803
+ if len(paths_list) == 0:
1804
+ print("⚠ No reference images/embeddings found (this test harness may be running without data)")
1805
+ # Return empty results gracefully
1806
+ return [[] for _ in sprite_images_bytes], [[] for _ in sprite_images_bytes], []
1807
+
1808
+ ref_matrix = np.vstack(embeddings_list).astype(np.float32)
1809
+
1810
+ # Batch: Get all sprite embeddings, phash, sigs first
1811
+ sprite_emb_list = []
1812
+ sprite_phash_list = []
1813
+ sprite_sig_list = []
1814
+ per_sprite_final_indices = []
1815
+ per_sprite_final_scores = []
1816
+ per_sprite_rerank_debug = []
1817
+ for i, sprite_bytes in enumerate(sprite_images_bytes):
1818
+ sprite_pil = Image.open(sprite_bytes)
1819
+ enhanced_sprite = process_image_cv2_from_pil(sprite_pil, scale=2) or sprite_pil
1820
+ # sprite_emb = get_dinov2_embedding_from_pil(preprocess_for_model(enhanced_sprite)) or np.zeros(ref_matrix.shape[1])
1821
+ # sprite_emb_list.append(sprite_emb)
1822
+ sprite_emb = get_dinov2_embedding_from_pil(preprocess_for_model(enhanced_sprite))
1823
+ sprite_emb = sprite_emb if sprite_emb is not None else np.zeros(ref_matrix.shape[1])
1824
+ sprite_emb_list.append(sprite_emb)
1825
+ # Perceptual hash
1826
+ sprite_hash_arr = preprocess_for_hash(enhanced_sprite)
1827
+ sprite_phash = None
1828
+ if sprite_hash_arr is not None:
1829
+ try: sprite_phash = phash.encode_image(image_array=sprite_hash_arr)
1830
+ except: pass
1831
+ sprite_phash_list.append(sprite_phash)
1832
+ # Signature
1833
+ sprite_sig = None
1834
+ embedding_results, phash_results, imgmatch_results, combined_results = run_query_search_flow(
1835
+ query_b64=sprite_b64_clean[i],
1836
+ processed_dir=BLOCKS_DIR,
1837
+ embeddings_dict=embedding_json,
1838
+ hash_dict=hash_data,
1839
+ signature_obj_map=sig_data,
1840
+ gis=gis,
1841
+ phash=phash,
1842
+ MAX_PHASH_BITS=64,
1843
+ k=5
1844
+ )
1845
+ # Call the advanced re-ranker
1846
+ rerank_result = choose_top_candidates(embedding_results, phash_results, imgmatch_results,
1847
+ top_k=top_k, method_weights=method_weights, verbose=True)
1848
+ per_sprite_rerank_debug.append(rerank_result)
1849
+
1850
+ # Selection logic: prefer consensus, else weighted top-1
1851
+ final = None
1852
+ if len(rerank_result["consensus_topk"]) > 0:
1853
+ consensus = rerank_result["consensus_topk"]
1854
+ best = max(consensus, key=lambda p: rerank_result["weighted_scores_full"].get(p, 0.0))
1855
+ final = best
1856
+ else:
1857
+ final = rerank_result["weighted_topk"][0][0] if rerank_result["weighted_topk"] else None
1858
+
1859
+ # Store index and score for downstream use
1860
+ if final is not None and final in paths_list:
1861
+ idx = paths_list.index(final)
1862
+ score = rerank_result["weighted_scores_full"].get(final, 0.0)
1863
+ per_sprite_final_indices.append([idx])
1864
+ per_sprite_final_scores.append([score])
1865
+ print(f"Sprite '{sprite_ids}' FINAL selected: {final} (index {idx}) score={score:.4f}")
1866
+ else:
1867
+ per_sprite_final_indices.append([])
1868
+ per_sprite_final_scores.append([])
1869
+
1870
+ return per_sprite_final_indices, per_sprite_final_scores, paths_list#, per_sprite_rerank_debug
1871
+
1872
 
1873
  # Use hybrid matching system
1874
  per_sprite_matched_indices, per_sprite_scores, paths_list = hybrid_similarity_matching(