import os import json import faiss import numpy as np from sentence_transformers import SentenceTransformer # Fix caching permissions for Hugging Face os.environ["HF_HOME"] = "./cache" os.environ["TRANSFORMERS_CACHE"] = "./cache" os.environ["SENTENCE_TRANSFORMERS_HOME"] = "./cache" # Load products with open("products.json", "r", encoding="utf-8") as f: products = json.load(f) print(f"📦 Loaded {len(products)} products from products.json") # Load CLIP model print("🧠 Loading CLIP model...") model = SentenceTransformer("sentence-transformers/clip-ViT-B-32", cache_folder="./cache") # Encode product names (normalize for cosine sim) print("🔎 Encoding product features...") product_names = [p["name"] for p in products] embeddings = model.encode( product_names, convert_to_numpy=True, show_progress_bar=True, normalize_embeddings=True, # important for cosine similarity ) # Build FAISS index with cosine similarity (inner product on normalized vectors) dimension = embeddings.shape[1] index = faiss.IndexFlatIP(dimension) index.add(embeddings) # Save index faiss.write_index(index, "products.index") print("✅ Saved FAISS index to products.index") # Save mapping with open("id_mapping.json", "w", encoding="utf-8") as f: json.dump(products, f, ensure_ascii=False, indent=2) print("✅ Saved product ID mapping to id_mapping.json")