prthm11 commited on
Commit
4dddcbe
·
verified ·
1 Parent(s): 4fa8083

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -92
app.py CHANGED
@@ -30,43 +30,43 @@ from difflib import get_close_matches
30
  import torch
31
  from transformers import AutoImageProcessor, AutoModel
32
 
33
- # --- Config (tune threads as needed) ---
34
- DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
35
- DEVICE = torch.device("cpu")
36
- torch.set_num_threads(4) # tune for your CPU
37
-
38
- # --- Globals for single-shot model load ---
39
- _dinov2_processor = None
40
- _dinov2_model = None
41
-
42
- os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
43
- class ChatOpenRouter(ChatOpenAI):
44
- openai_api_key: Optional[SecretStr] = Field(
45
- alias="api_key",
46
- default_factory=secret_from_env("OPENROUTER_API_KEY", default=None),
47
- )
48
- @property
49
- def lc_secrets(self) -> dict[str, str]:
50
- return {"openai_api_key": "OPENROUTER_API_KEY"}
51
 
52
- def __init__(self,
53
- openai_api_key: Optional[str] = None,
54
- **kwargs):
55
- openai_api_key = (
56
- openai_api_key or os.environ.get("OPENROUTER_API_KEY")
57
- )
58
- super().__init__(
59
- base_url="https://openrouter.ai/api/v1",
60
- openai_api_key=openai_api_key,
61
- **kwargs
62
- )
63
 
64
- llm2 = ChatOpenRouter(
65
- #model_name="deepseek/deepseek-r1-0528:free",
66
- #model_name="google/gemini-2.0-flash-exp:free",
67
- #model_name="deepseek/deepseek-v3-base:free",
68
- model_name="deepseek/deepseek-r1:free"
69
- )
70
 
71
 
72
  def log_execution_time(func):
@@ -79,7 +79,7 @@ def log_execution_time(func):
79
  return result
80
  return wrapper
81
 
82
- global pdf_doc
83
  # ============================== #
84
  # INITIALIZE CLIP EMBEDDER #
85
  # ============================== #
@@ -319,43 +319,43 @@ agent_json_resolver = create_react_agent(
319
  prompt=SYSTEM_PROMPT_JSON_CORRECTOR
320
  )
321
 
322
- # adding the new embedding models:
323
- def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
324
- """Lazy-initialize DINOv2 processor & model (call once before embedding)."""
325
- global _dinov2_processor, _dinov2_model
326
- if _dinov2_processor is None or _dinov2_model is None:
327
- # _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
328
- _dinov2_processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)
329
- _dinov2_model = AutoModel.from_pretrained(model_name)
330
- _dinov2_model.eval().to(device)
331
-
332
- def embed_bytesio_list(bytesio_list, batch_size: int = 8):
333
- """
334
- Accepts a list of BytesIO objects (each contains an image, like your sprite_images_bytes).
335
- Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
336
- """
337
- if _dinov2_processor is None or _dinov2_model is None:
338
- init_dinov2()
339
-
340
- imgs = [Image.open(b).convert("RGB") for b in bytesio_list]
341
- embs = []
342
- for i in range(0, len(imgs), batch_size):
343
- batch = imgs[i : i + batch_size]
344
- inputs = _dinov2_processor(images=batch, return_tensors="pt")
345
- inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
346
- with torch.no_grad():
347
- out = _dinov2_model(**inputs)
348
- # global image embedding from CLS token
349
- cls = out.last_hidden_state[:, 0, :] # (B, D)
350
- cls = torch.nn.functional.normalize(cls, p=2, dim=1) # L2 normalize rows
351
- embs.append(cls.cpu().numpy())
352
- if not embs:
353
- return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
354
- return np.vstack(embs).astype(np.float32)
355
-
356
- def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
357
- norm = np.linalg.norm(a, axis=1, keepdims=True)
358
- return a / (norm + eps)
359
 
360
  # Helper function to load the block catalog from a JSON file
361
  def _load_block_catalog(block_type: str) -> Dict:
@@ -921,7 +921,6 @@ def clean_base64_for_model(raw_b64, max_bytes_threshold=4000000) -> str:
921
  # otherwise return original with its mime prefix (ensure prefix exists)
922
  return f"data:{mime};base64,{clean_b64}"
923
 
924
-
925
  SCRATCH_OPCODES = [
926
  'motion_movesteps', 'motion_turnright', 'motion_turnleft', 'motion_goto',
927
  'motion_gotoxy', 'motion_glideto', 'motion_glidesecstoxy', 'motion_pointindirection',
@@ -3382,43 +3381,107 @@ SPRITE_DIR / "Abby.sprite3" / "34a175600dc009a521eb46fdbbbeeb67.png"
3382
  CODE_BLOCKS_DIR / "script5.jpg",
3383
  CODE_BLOCKS_DIR / "script6.jpg"]
3384
  folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
3385
- # =========================================
3386
 
3387
- # -----------------------------------------
3388
- # Load reference embeddings from JSON
3389
- # -----------------------------------------
3390
- with open(f"{BLOCKS_DIR}/embed.json", "r") as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3391
  embedding_json = json.load(f)
3392
 
 
 
 
 
 
 
 
 
 
 
 
3393
  # =========================================
3394
  # Decode & embed each sprite image
3395
  # =========================================
 
 
 
3396
  # sprite_features = []
3397
  # for b64 in sprite_base64:
3398
- # if "," in b64:
3399
  # b64 = b64.split(",", 1)[1]
3400
-
3401
  # img_bytes = base64.b64decode(b64)
3402
  # pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
 
 
3403
  # buf = BytesIO()
3404
  # pil_img.save(buf, format="PNG")
3405
  # buf.seek(0)
3406
- # feats = clip_embd.embed_image([buf])[0]
 
3407
  # sprite_features.append(feats)
 
 
 
 
 
 
 
3408
 
3409
- # ============================== #
3410
- # EMBED SPRITE IMAGES #
3411
- # ============================== #
3412
- # ensure model is initialized (fast no-op after first call)
3413
- init_dinov2()
3414
-
3415
- # embed the incoming sprite BytesIO images (same data structure you already use)
3416
- sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
3417
 
3418
- # load reference embeddings from JSON (they must be numeric lists)
3419
- img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
3420
 
3421
  # normalize both sides (important — stored embeddings may not be normalized)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3422
  sprite_matrix = l2_normalize_rows(sprite_matrix)
3423
  img_matrix = l2_normalize_rows(img_matrix)
3424
 
 
30
  import torch
31
  from transformers import AutoImageProcessor, AutoModel
32
 
33
+ # # --- Config (tune threads as needed) ---
34
+ # DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
35
+ # DEVICE = torch.device("cpu")
36
+ # torch.set_num_threads(4) # tune for your CPU
37
+
38
+ # # --- Globals for single-shot model load ---
39
+ # _dinov2_processor = None
40
+ # _dinov2_model = None
41
+
42
+ # os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
43
+ # class ChatOpenRouter(ChatOpenAI):
44
+ # openai_api_key: Optional[SecretStr] = Field(
45
+ # alias="api_key",
46
+ # default_factory=secret_from_env("OPENROUTER_API_KEY", default=None),
47
+ # )
48
+ # @property
49
+ # def lc_secrets(self) -> dict[str, str]:
50
+ # return {"openai_api_key": "OPENROUTER_API_KEY"}
51
 
52
+ # def __init__(self,
53
+ # openai_api_key: Optional[str] = None,
54
+ # **kwargs):
55
+ # openai_api_key = (
56
+ # openai_api_key or os.environ.get("OPENROUTER_API_KEY")
57
+ # )
58
+ # super().__init__(
59
+ # base_url="https://openrouter.ai/api/v1",
60
+ # openai_api_key=openai_api_key,
61
+ # **kwargs
62
+ # )
63
 
64
+ # llm2 = ChatOpenRouter(
65
+ # #model_name="deepseek/deepseek-r1-0528:free",
66
+ # #model_name="google/gemini-2.0-flash-exp:free",
67
+ # #model_name="deepseek/deepseek-v3-base:free",
68
+ # model_name="deepseek/deepseek-r1:free"
69
+ # )
70
 
71
 
72
  def log_execution_time(func):
 
79
  return result
80
  return wrapper
81
 
82
+ # global pdf_doc
83
  # ============================== #
84
  # INITIALIZE CLIP EMBEDDER #
85
  # ============================== #
 
319
  prompt=SYSTEM_PROMPT_JSON_CORRECTOR
320
  )
321
 
322
+ # # adding the new embedding models:
323
+ # def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
324
+ # """Lazy-initialize DINOv2 processor & model (call once before embedding)."""
325
+ # global _dinov2_processor, _dinov2_model
326
+ # if _dinov2_processor is None or _dinov2_model is None:
327
+ # # _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
328
+ # _dinov2_processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)
329
+ # _dinov2_model = AutoModel.from_pretrained(model_name)
330
+ # _dinov2_model.eval().to(device)
331
+
332
+ # def embed_bytesio_list(bytesio_list, batch_size: int = 8):
333
+ # """
334
+ # Accepts a list of BytesIO objects (each contains an image, like your sprite_images_bytes).
335
+ # Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
336
+ # """
337
+ # if _dinov2_processor is None or _dinov2_model is None:
338
+ # init_dinov2()
339
+
340
+ # imgs = [Image.open(b).convert("RGB") for b in bytesio_list]
341
+ # embs = []
342
+ # for i in range(0, len(imgs), batch_size):
343
+ # batch = imgs[i : i + batch_size]
344
+ # inputs = _dinov2_processor(images=batch, return_tensors="pt")
345
+ # inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
346
+ # with torch.no_grad():
347
+ # out = _dinov2_model(**inputs)
348
+ # # global image embedding from CLS token
349
+ # cls = out.last_hidden_state[:, 0, :] # (B, D)
350
+ # cls = torch.nn.functional.normalize(cls, p=2, dim=1) # L2 normalize rows
351
+ # embs.append(cls.cpu().numpy())
352
+ # if not embs:
353
+ # return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
354
+ # return np.vstack(embs).astype(np.float32)
355
+
356
+ # def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
357
+ # norm = np.linalg.norm(a, axis=1, keepdims=True)
358
+ # return a / (norm + eps)
359
 
360
  # Helper function to load the block catalog from a JSON file
361
  def _load_block_catalog(block_type: str) -> Dict:
 
921
  # otherwise return original with its mime prefix (ensure prefix exists)
922
  return f"data:{mime};base64,{clean_b64}"
923
 
 
924
  SCRATCH_OPCODES = [
925
  'motion_movesteps', 'motion_turnright', 'motion_turnleft', 'motion_goto',
926
  'motion_gotoxy', 'motion_glideto', 'motion_glidesecstoxy', 'motion_pointindirection',
 
3381
  CODE_BLOCKS_DIR / "script5.jpg",
3382
  CODE_BLOCKS_DIR / "script6.jpg"]
3383
  folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
 
3384
 
3385
+
3386
+ # ============================== #
3387
+ # EMBED SPRITE IMAGES #
3388
+ # (using CLIP again) #
3389
+ # ============================== #
3390
+
3391
+ # Make sure all buffers are at start
3392
+ for buf in sprite_images_bytes:
3393
+ try:
3394
+ buf.seek(0)
3395
+ except Exception:
3396
+ pass
3397
+
3398
+ # Try the fast path: embed whole list at once (many CLIP wrappers accept a list of BytesIO/PIL)
3399
+ try:
3400
+ sprite_matrix = clip_embd.embed_image(sprite_images_bytes, batch_size=8)
3401
+ sprite_matrix = np.array(sprite_matrix, dtype=np.float32)
3402
+ except Exception:
3403
+ sprite_feats = []
3404
+ for buf in sprite_images_bytes:
3405
+ buf.seek(0)
3406
+ try:
3407
+ feats = clip_embd.embed_image([buf])[0]
3408
+ except Exception:
3409
+ buf.seek(0)
3410
+ pil_img = Image.open(buf).convert("RGB")
3411
+ try:
3412
+ feats = clip_embd.embed_image([pil_img])[0]
3413
+ except Exception:
3414
+ pil_arr = np.array(pil_img)
3415
+ feats = clip_embd.embed_image([pil_arr])[0]
3416
+ sprite_feats.append(np.asarray(feats, dtype=np.float32))
3417
+ sprite_matrix = np.vstack(sprite_feats) # shape (N, D)
3418
+
3419
+ # --- load reference embeddings (unchanged) ---
3420
+ with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
3421
  embedding_json = json.load(f)
3422
 
3423
+ img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
3424
+
3425
+
3426
+ # =========================================
3427
+
3428
+ # # -----------------------------------------
3429
+ # # Load reference embeddings from JSON
3430
+ # # -----------------------------------------
3431
+ # with open(f"{BLOCKS_DIR}/embed.json", "r") as f:
3432
+ # embedding_json = json.load(f)
3433
+
3434
  # =========================================
3435
  # Decode & embed each sprite image
3436
  # =========================================
3437
+ # # ============================== #
3438
+ # # EMBED SPRITE IMAGES #
3439
+ # # ============================== #
3440
  # sprite_features = []
3441
  # for b64 in sprite_base64:
3442
+ # if "," in b64: # strip data URI prefix if present
3443
  # b64 = b64.split(",", 1)[1]
3444
+
3445
  # img_bytes = base64.b64decode(b64)
3446
  # pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
3447
+
3448
+ # # optional re-encode to PNG for CLIP
3449
  # buf = BytesIO()
3450
  # pil_img.save(buf, format="PNG")
3451
  # buf.seek(0)
3452
+
3453
+ # feats = clip_embd.embed_image([buf])[0] # extract CLIP embedding
3454
  # sprite_features.append(feats)
3455
+
3456
+ # sprite_matrix = np.array(sprite_features, dtype=np.float32)
3457
+ # # ============================== #
3458
+ # # EMBED SPRITE IMAGES #
3459
+ # # ============================== #
3460
+ # # ensure model is initialized (fast no-op after first call)
3461
+ # init_dinov2()
3462
 
3463
+ # # embed the incoming sprite BytesIO images (same data structure you already use)
3464
+ # sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
 
 
 
 
 
 
3465
 
3466
+ # # load reference embeddings from JSON (they must be numeric lists)
3467
+ # img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
3468
 
3469
  # normalize both sides (important — stored embeddings may not be normalized)
3470
+
3471
+ def l2_normalize_rows(x: np.ndarray, eps: float = 1e-10) -> np.ndarray:
3472
+ """
3473
+ L2-normalize each row of a 2D numpy array.
3474
+
3475
+ Args:
3476
+ x: Array of shape (N, D).
3477
+ eps: Small constant to avoid division by zero.
3478
+
3479
+ Returns:
3480
+ Normalized array of shape (N, D) where each row has unit norm.
3481
+ """
3482
+ norms = np.linalg.norm(x, axis=1, keepdims=True)
3483
+ return x / np.maximum(norms, eps)
3484
+
3485
  sprite_matrix = l2_normalize_rows(sprite_matrix)
3486
  img_matrix = l2_normalize_rows(img_matrix)
3487