Update app.py
Browse files
app.py
CHANGED
|
@@ -30,43 +30,43 @@ from difflib import get_close_matches
|
|
| 30 |
import torch
|
| 31 |
from transformers import AutoImageProcessor, AutoModel
|
| 32 |
|
| 33 |
-
# --- Config (tune threads as needed) ---
|
| 34 |
-
DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
|
| 35 |
-
DEVICE = torch.device("cpu")
|
| 36 |
-
torch.set_num_threads(4) # tune for your CPU
|
| 37 |
-
|
| 38 |
-
# --- Globals for single-shot model load ---
|
| 39 |
-
_dinov2_processor = None
|
| 40 |
-
_dinov2_model = None
|
| 41 |
-
|
| 42 |
-
os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
|
| 43 |
-
class ChatOpenRouter(ChatOpenAI):
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
|
| 64 |
-
llm2 = ChatOpenRouter(
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
)
|
| 70 |
|
| 71 |
|
| 72 |
def log_execution_time(func):
|
|
@@ -79,7 +79,7 @@ def log_execution_time(func):
|
|
| 79 |
return result
|
| 80 |
return wrapper
|
| 81 |
|
| 82 |
-
global pdf_doc
|
| 83 |
# ============================== #
|
| 84 |
# INITIALIZE CLIP EMBEDDER #
|
| 85 |
# ============================== #
|
|
@@ -319,43 +319,43 @@ agent_json_resolver = create_react_agent(
|
|
| 319 |
prompt=SYSTEM_PROMPT_JSON_CORRECTOR
|
| 320 |
)
|
| 321 |
|
| 322 |
-
# adding the new embedding models:
|
| 323 |
-
def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
def embed_bytesio_list(bytesio_list, batch_size: int = 8):
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
| 357 |
-
|
| 358 |
-
|
| 359 |
|
| 360 |
# Helper function to load the block catalog from a JSON file
|
| 361 |
def _load_block_catalog(block_type: str) -> Dict:
|
|
@@ -921,7 +921,6 @@ def clean_base64_for_model(raw_b64, max_bytes_threshold=4000000) -> str:
|
|
| 921 |
# otherwise return original with its mime prefix (ensure prefix exists)
|
| 922 |
return f"data:{mime};base64,{clean_b64}"
|
| 923 |
|
| 924 |
-
|
| 925 |
SCRATCH_OPCODES = [
|
| 926 |
'motion_movesteps', 'motion_turnright', 'motion_turnleft', 'motion_goto',
|
| 927 |
'motion_gotoxy', 'motion_glideto', 'motion_glidesecstoxy', 'motion_pointindirection',
|
|
@@ -3382,43 +3381,107 @@ SPRITE_DIR / "Abby.sprite3" / "34a175600dc009a521eb46fdbbbeeb67.png"
|
|
| 3382 |
CODE_BLOCKS_DIR / "script5.jpg",
|
| 3383 |
CODE_BLOCKS_DIR / "script6.jpg"]
|
| 3384 |
folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
|
| 3385 |
-
# =========================================
|
| 3386 |
|
| 3387 |
-
|
| 3388 |
-
#
|
| 3389 |
-
#
|
| 3390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3391 |
embedding_json = json.load(f)
|
| 3392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3393 |
# =========================================
|
| 3394 |
# Decode & embed each sprite image
|
| 3395 |
# =========================================
|
|
|
|
|
|
|
|
|
|
| 3396 |
# sprite_features = []
|
| 3397 |
# for b64 in sprite_base64:
|
| 3398 |
-
# if "," in b64:
|
| 3399 |
# b64 = b64.split(",", 1)[1]
|
| 3400 |
-
|
| 3401 |
# img_bytes = base64.b64decode(b64)
|
| 3402 |
# pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
|
|
|
|
|
|
|
| 3403 |
# buf = BytesIO()
|
| 3404 |
# pil_img.save(buf, format="PNG")
|
| 3405 |
# buf.seek(0)
|
| 3406 |
-
|
|
|
|
| 3407 |
# sprite_features.append(feats)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3408 |
|
| 3409 |
-
#
|
| 3410 |
-
#
|
| 3411 |
-
# ============================== #
|
| 3412 |
-
# ensure model is initialized (fast no-op after first call)
|
| 3413 |
-
init_dinov2()
|
| 3414 |
-
|
| 3415 |
-
# embed the incoming sprite BytesIO images (same data structure you already use)
|
| 3416 |
-
sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
|
| 3417 |
|
| 3418 |
-
# load reference embeddings from JSON (they must be numeric lists)
|
| 3419 |
-
img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 3420 |
|
| 3421 |
# normalize both sides (important — stored embeddings may not be normalized)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3422 |
sprite_matrix = l2_normalize_rows(sprite_matrix)
|
| 3423 |
img_matrix = l2_normalize_rows(img_matrix)
|
| 3424 |
|
|
|
|
| 30 |
import torch
|
| 31 |
from transformers import AutoImageProcessor, AutoModel
|
| 32 |
|
| 33 |
+
# # --- Config (tune threads as needed) ---
|
| 34 |
+
# DINOV2_MODEL = "facebook/dinov2-small" # small = best CPU latency/quality tradeoff
|
| 35 |
+
# DEVICE = torch.device("cpu")
|
| 36 |
+
# torch.set_num_threads(4) # tune for your CPU
|
| 37 |
+
|
| 38 |
+
# # --- Globals for single-shot model load ---
|
| 39 |
+
# _dinov2_processor = None
|
| 40 |
+
# _dinov2_model = None
|
| 41 |
+
|
| 42 |
+
# os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "default_key_or_placeholder")
|
| 43 |
+
# class ChatOpenRouter(ChatOpenAI):
|
| 44 |
+
# openai_api_key: Optional[SecretStr] = Field(
|
| 45 |
+
# alias="api_key",
|
| 46 |
+
# default_factory=secret_from_env("OPENROUTER_API_KEY", default=None),
|
| 47 |
+
# )
|
| 48 |
+
# @property
|
| 49 |
+
# def lc_secrets(self) -> dict[str, str]:
|
| 50 |
+
# return {"openai_api_key": "OPENROUTER_API_KEY"}
|
| 51 |
|
| 52 |
+
# def __init__(self,
|
| 53 |
+
# openai_api_key: Optional[str] = None,
|
| 54 |
+
# **kwargs):
|
| 55 |
+
# openai_api_key = (
|
| 56 |
+
# openai_api_key or os.environ.get("OPENROUTER_API_KEY")
|
| 57 |
+
# )
|
| 58 |
+
# super().__init__(
|
| 59 |
+
# base_url="https://openrouter.ai/api/v1",
|
| 60 |
+
# openai_api_key=openai_api_key,
|
| 61 |
+
# **kwargs
|
| 62 |
+
# )
|
| 63 |
|
| 64 |
+
# llm2 = ChatOpenRouter(
|
| 65 |
+
# #model_name="deepseek/deepseek-r1-0528:free",
|
| 66 |
+
# #model_name="google/gemini-2.0-flash-exp:free",
|
| 67 |
+
# #model_name="deepseek/deepseek-v3-base:free",
|
| 68 |
+
# model_name="deepseek/deepseek-r1:free"
|
| 69 |
+
# )
|
| 70 |
|
| 71 |
|
| 72 |
def log_execution_time(func):
|
|
|
|
| 79 |
return result
|
| 80 |
return wrapper
|
| 81 |
|
| 82 |
+
# global pdf_doc
|
| 83 |
# ============================== #
|
| 84 |
# INITIALIZE CLIP EMBEDDER #
|
| 85 |
# ============================== #
|
|
|
|
| 319 |
prompt=SYSTEM_PROMPT_JSON_CORRECTOR
|
| 320 |
)
|
| 321 |
|
| 322 |
+
# # adding the new embedding models:
|
| 323 |
+
# def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
|
| 324 |
+
# """Lazy-initialize DINOv2 processor & model (call once before embedding)."""
|
| 325 |
+
# global _dinov2_processor, _dinov2_model
|
| 326 |
+
# if _dinov2_processor is None or _dinov2_model is None:
|
| 327 |
+
# # _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
|
| 328 |
+
# _dinov2_processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)
|
| 329 |
+
# _dinov2_model = AutoModel.from_pretrained(model_name)
|
| 330 |
+
# _dinov2_model.eval().to(device)
|
| 331 |
+
|
| 332 |
+
# def embed_bytesio_list(bytesio_list, batch_size: int = 8):
|
| 333 |
+
# """
|
| 334 |
+
# Accepts a list of BytesIO objects (each contains an image, like your sprite_images_bytes).
|
| 335 |
+
# Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
|
| 336 |
+
# """
|
| 337 |
+
# if _dinov2_processor is None or _dinov2_model is None:
|
| 338 |
+
# init_dinov2()
|
| 339 |
+
|
| 340 |
+
# imgs = [Image.open(b).convert("RGB") for b in bytesio_list]
|
| 341 |
+
# embs = []
|
| 342 |
+
# for i in range(0, len(imgs), batch_size):
|
| 343 |
+
# batch = imgs[i : i + batch_size]
|
| 344 |
+
# inputs = _dinov2_processor(images=batch, return_tensors="pt")
|
| 345 |
+
# inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 346 |
+
# with torch.no_grad():
|
| 347 |
+
# out = _dinov2_model(**inputs)
|
| 348 |
+
# # global image embedding from CLS token
|
| 349 |
+
# cls = out.last_hidden_state[:, 0, :] # (B, D)
|
| 350 |
+
# cls = torch.nn.functional.normalize(cls, p=2, dim=1) # L2 normalize rows
|
| 351 |
+
# embs.append(cls.cpu().numpy())
|
| 352 |
+
# if not embs:
|
| 353 |
+
# return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
|
| 354 |
+
# return np.vstack(embs).astype(np.float32)
|
| 355 |
+
|
| 356 |
+
# def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
| 357 |
+
# norm = np.linalg.norm(a, axis=1, keepdims=True)
|
| 358 |
+
# return a / (norm + eps)
|
| 359 |
|
| 360 |
# Helper function to load the block catalog from a JSON file
|
| 361 |
def _load_block_catalog(block_type: str) -> Dict:
|
|
|
|
| 921 |
# otherwise return original with its mime prefix (ensure prefix exists)
|
| 922 |
return f"data:{mime};base64,{clean_b64}"
|
| 923 |
|
|
|
|
| 924 |
SCRATCH_OPCODES = [
|
| 925 |
'motion_movesteps', 'motion_turnright', 'motion_turnleft', 'motion_goto',
|
| 926 |
'motion_gotoxy', 'motion_glideto', 'motion_glidesecstoxy', 'motion_pointindirection',
|
|
|
|
| 3381 |
CODE_BLOCKS_DIR / "script5.jpg",
|
| 3382 |
CODE_BLOCKS_DIR / "script6.jpg"]
|
| 3383 |
folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
|
|
|
|
| 3384 |
|
| 3385 |
+
|
| 3386 |
+
# ============================== #
|
| 3387 |
+
# EMBED SPRITE IMAGES #
|
| 3388 |
+
# (using CLIP again) #
|
| 3389 |
+
# ============================== #
|
| 3390 |
+
|
| 3391 |
+
# Make sure all buffers are at start
|
| 3392 |
+
for buf in sprite_images_bytes:
|
| 3393 |
+
try:
|
| 3394 |
+
buf.seek(0)
|
| 3395 |
+
except Exception:
|
| 3396 |
+
pass
|
| 3397 |
+
|
| 3398 |
+
# Try the fast path: embed whole list at once (many CLIP wrappers accept a list of BytesIO/PIL)
|
| 3399 |
+
try:
|
| 3400 |
+
sprite_matrix = clip_embd.embed_image(sprite_images_bytes, batch_size=8)
|
| 3401 |
+
sprite_matrix = np.array(sprite_matrix, dtype=np.float32)
|
| 3402 |
+
except Exception:
|
| 3403 |
+
sprite_feats = []
|
| 3404 |
+
for buf in sprite_images_bytes:
|
| 3405 |
+
buf.seek(0)
|
| 3406 |
+
try:
|
| 3407 |
+
feats = clip_embd.embed_image([buf])[0]
|
| 3408 |
+
except Exception:
|
| 3409 |
+
buf.seek(0)
|
| 3410 |
+
pil_img = Image.open(buf).convert("RGB")
|
| 3411 |
+
try:
|
| 3412 |
+
feats = clip_embd.embed_image([pil_img])[0]
|
| 3413 |
+
except Exception:
|
| 3414 |
+
pil_arr = np.array(pil_img)
|
| 3415 |
+
feats = clip_embd.embed_image([pil_arr])[0]
|
| 3416 |
+
sprite_feats.append(np.asarray(feats, dtype=np.float32))
|
| 3417 |
+
sprite_matrix = np.vstack(sprite_feats) # shape (N, D)
|
| 3418 |
+
|
| 3419 |
+
# --- load reference embeddings (unchanged) ---
|
| 3420 |
+
with open(f"{BLOCKS_DIR}/openclip_embeddings.json", "r") as f:
|
| 3421 |
embedding_json = json.load(f)
|
| 3422 |
|
| 3423 |
+
img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 3424 |
+
|
| 3425 |
+
|
| 3426 |
+
# =========================================
|
| 3427 |
+
|
| 3428 |
+
# # -----------------------------------------
|
| 3429 |
+
# # Load reference embeddings from JSON
|
| 3430 |
+
# # -----------------------------------------
|
| 3431 |
+
# with open(f"{BLOCKS_DIR}/embed.json", "r") as f:
|
| 3432 |
+
# embedding_json = json.load(f)
|
| 3433 |
+
|
| 3434 |
# =========================================
|
| 3435 |
# Decode & embed each sprite image
|
| 3436 |
# =========================================
|
| 3437 |
+
# # ============================== #
|
| 3438 |
+
# # EMBED SPRITE IMAGES #
|
| 3439 |
+
# # ============================== #
|
| 3440 |
# sprite_features = []
|
| 3441 |
# for b64 in sprite_base64:
|
| 3442 |
+
# if "," in b64: # strip data URI prefix if present
|
| 3443 |
# b64 = b64.split(",", 1)[1]
|
| 3444 |
+
|
| 3445 |
# img_bytes = base64.b64decode(b64)
|
| 3446 |
# pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
|
| 3447 |
+
|
| 3448 |
+
# # optional re-encode to PNG for CLIP
|
| 3449 |
# buf = BytesIO()
|
| 3450 |
# pil_img.save(buf, format="PNG")
|
| 3451 |
# buf.seek(0)
|
| 3452 |
+
|
| 3453 |
+
# feats = clip_embd.embed_image([buf])[0] # extract CLIP embedding
|
| 3454 |
# sprite_features.append(feats)
|
| 3455 |
+
|
| 3456 |
+
# sprite_matrix = np.array(sprite_features, dtype=np.float32)
|
| 3457 |
+
# # ============================== #
|
| 3458 |
+
# # EMBED SPRITE IMAGES #
|
| 3459 |
+
# # ============================== #
|
| 3460 |
+
# # ensure model is initialized (fast no-op after first call)
|
| 3461 |
+
# init_dinov2()
|
| 3462 |
|
| 3463 |
+
# # embed the incoming sprite BytesIO images (same data structure you already use)
|
| 3464 |
+
# sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3465 |
|
| 3466 |
+
# # load reference embeddings from JSON (they must be numeric lists)
|
| 3467 |
+
# img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 3468 |
|
| 3469 |
# normalize both sides (important — stored embeddings may not be normalized)
|
| 3470 |
+
|
| 3471 |
+
def l2_normalize_rows(x: np.ndarray, eps: float = 1e-10) -> np.ndarray:
|
| 3472 |
+
"""
|
| 3473 |
+
L2-normalize each row of a 2D numpy array.
|
| 3474 |
+
|
| 3475 |
+
Args:
|
| 3476 |
+
x: Array of shape (N, D).
|
| 3477 |
+
eps: Small constant to avoid division by zero.
|
| 3478 |
+
|
| 3479 |
+
Returns:
|
| 3480 |
+
Normalized array of shape (N, D) where each row has unit norm.
|
| 3481 |
+
"""
|
| 3482 |
+
norms = np.linalg.norm(x, axis=1, keepdims=True)
|
| 3483 |
+
return x / np.maximum(norms, eps)
|
| 3484 |
+
|
| 3485 |
sprite_matrix = l2_normalize_rows(sprite_matrix)
|
| 3486 |
img_matrix = l2_normalize_rows(img_matrix)
|
| 3487 |
|