File size: 1,206 Bytes
3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 3eaabcf eb42a98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
# Fix caching permissions for Hugging Face
os.environ["HF_HOME"] = "./cache"
os.environ["TRANSFORMERS_CACHE"] = "./cache"
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "./cache"
# Load products
with open("products.json", "r", encoding="utf-8") as f:
products = json.load(f)
print(f"π¦ Loaded {len(products)} products from products.json")
# Load CLIP model
print("π§ Loading CLIP model...")
model = SentenceTransformer("sentence-transformers/clip-ViT-B-32", cache_folder="./cache")
# Encode product names
print("π Encoding product features...")
product_names = [p["name"] for p in products]
embeddings = model.encode(product_names, convert_to_numpy=True, show_progress_bar=True)
# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# Save index
faiss.write_index(index, "products.index")
print("β
Saved FAISS index to products.index")
# Save mapping
with open("id_mapping.json", "w", encoding="utf-8") as f:
json.dump(products, f, ensure_ascii=False, indent=2)
print("β
Saved product ID mapping to id_mapping.json")
|