File size: 1,206 Bytes
3eaabcf
 
 
 
 
 
eb42a98
 
 
 
3eaabcf
eb42a98
 
3eaabcf
 
eb42a98
3eaabcf
 
eb42a98
 
3eaabcf
eb42a98
 
 
 
3eaabcf
eb42a98
 
 
 
3eaabcf
eb42a98
 
 
3eaabcf
eb42a98
 
 
3eaabcf
eb42a98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Fix caching permissions for Hugging Face
os.environ["HF_HOME"] = "./cache"
os.environ["TRANSFORMERS_CACHE"] = "./cache"
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "./cache"

# Load products
with open("products.json", "r", encoding="utf-8") as f:
    products = json.load(f)

print(f"πŸ“¦ Loaded {len(products)} products from products.json")

# Load CLIP model
print("🧠 Loading CLIP model...")
model = SentenceTransformer("sentence-transformers/clip-ViT-B-32", cache_folder="./cache")

# Encode product names
print("πŸ”Ž Encoding product features...")
product_names = [p["name"] for p in products]
embeddings = model.encode(product_names, convert_to_numpy=True, show_progress_bar=True)

# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Save index
faiss.write_index(index, "products.index")
print("βœ… Saved FAISS index to products.index")

# Save mapping
with open("id_mapping.json", "w", encoding="utf-8") as f:
    json.dump(products, f, ensure_ascii=False, indent=2)

print("βœ… Saved product ID mapping to id_mapping.json")