import os import json import faiss import numpy as np from sentence_transformers import SentenceTransformer # Fix caching permissions for Hugging Face os.environ["HF_HOME"] = "./cache" os.environ["TRANSFORMERS_CACHE"] = "./cache" os.environ["SENTENCE_TRANSFORMERS_HOME"] = "./cache" # Load products with open("products.json", "r", encoding="utf-8") as f: products = json.load(f) print(f"📦 Loaded {len(products)} products from products.json") # Load CLIP model print("🧠 Loading CLIP model...") model = SentenceTransformer("sentence-transformers/clip-ViT-B-32", cache_folder="./cache") # Encode product names print("🔎 Encoding product features...") product_names = [p["name"] for p in products] embeddings = model.encode(product_names, convert_to_numpy=True, show_progress_bar=True) # Build FAISS index dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) # Save index faiss.write_index(index, "products.index") print("✅ Saved FAISS index to products.index") # Save mapping with open("id_mapping.json", "w", encoding="utf-8") as f: json.dump(products, f, ensure_ascii=False, indent=2) print("✅ Saved product ID mapping to id_mapping.json")