Visual_product_matcher / build_index.py
Yashashvibhardwaj's picture
Update build_index.py
eb42a98 verified
raw
history blame
1.21 kB
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
# Fix caching permissions for Hugging Face
os.environ["HF_HOME"] = "./cache"
os.environ["TRANSFORMERS_CACHE"] = "./cache"
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "./cache"
# Load products
with open("products.json", "r", encoding="utf-8") as f:
products = json.load(f)
print(f"πŸ“¦ Loaded {len(products)} products from products.json")
# Load CLIP model
print("🧠 Loading CLIP model...")
model = SentenceTransformer("sentence-transformers/clip-ViT-B-32", cache_folder="./cache")
# Encode product names
print("πŸ”Ž Encoding product features...")
product_names = [p["name"] for p in products]
embeddings = model.encode(product_names, convert_to_numpy=True, show_progress_bar=True)
# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# Save index
faiss.write_index(index, "products.index")
print("βœ… Saved FAISS index to products.index")
# Save mapping
with open("id_mapping.json", "w", encoding="utf-8") as f:
json.dump(products, f, ensure_ascii=False, indent=2)
print("βœ… Saved product ID mapping to id_mapping.json")