|
|
import os |
|
|
import json |
|
|
import faiss |
|
|
import numpy as np |
|
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
|
|
|
os.environ["HF_HOME"] = "./cache" |
|
|
os.environ["TRANSFORMERS_CACHE"] = "./cache" |
|
|
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "./cache" |
|
|
|
|
|
|
|
|
with open("products.json", "r", encoding="utf-8") as f: |
|
|
products = json.load(f) |
|
|
|
|
|
print(f"π¦ Loaded {len(products)} products from products.json") |
|
|
|
|
|
|
|
|
print("π§ Loading CLIP model...") |
|
|
model = SentenceTransformer("sentence-transformers/clip-ViT-B-32", cache_folder="./cache") |
|
|
|
|
|
|
|
|
print("π Encoding product features...") |
|
|
product_names = [p["name"] for p in products] |
|
|
embeddings = model.encode(product_names, convert_to_numpy=True, show_progress_bar=True) |
|
|
|
|
|
|
|
|
dimension = embeddings.shape[1] |
|
|
index = faiss.IndexFlatL2(dimension) |
|
|
index.add(embeddings) |
|
|
|
|
|
|
|
|
faiss.write_index(index, "products.index") |
|
|
print("β
Saved FAISS index to products.index") |
|
|
|
|
|
|
|
|
with open("id_mapping.json", "w", encoding="utf-8") as f: |
|
|
json.dump(products, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
print("β
Saved product ID mapping to id_mapping.json") |
|
|
|