Spaces:

Yashashvibhardwaj
/

Visual_product_matcher

Sleeping

App Files Files Community

Yashashvibhardwaj commited on Aug 31

Commit

3eaabcf

1 Parent(s): 9293eee

Deploy backend code

Browse files

Files changed (5) hide show

.gitignore +2 -0
DOCKERFILE +15 -0
build_index.py +80 -0
main.py +149 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ products.json
2	+ products.index

DOCKERFILE ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y git wget curl build-essential
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+# Build index if missing, then run FastAPI
+CMD ["bash", "-c", "python build_index.py && uvicorn main:app --host 0.0.0.0 --port 7860"]

build_index.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import json
+import requests
+import io
+import faiss
+import numpy as np
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+from tqdm import tqdm  # progress bar
+# ---------------------------------------------------
+# Locate products.json in the same folder as this script
+# ---------------------------------------------------
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+PRODUCTS_FILE = os.path.join(BASE_DIR, "products.json")
+INDEX_FILE = os.path.join(BASE_DIR, "products.index")
+# ---------------------------------------------------
+# Load product metadata
+# ---------------------------------------------------
+if not os.path.exists(PRODUCTS_FILE):
+    raise FileNotFoundError(f"❌ Could not find {PRODUCTS_FILE}")
+with open(PRODUCTS_FILE, "r", encoding="utf-8") as f:
+    products = json.load(f)
+print(f"📦 Loaded {len(products)} products from {PRODUCTS_FILE}")
+# ---------------------------------------------------
+# Load CLIP model
+# ---------------------------------------------------
+print("🧠 Loading CLIP model (this may take a few seconds)...")
+model = SentenceTransformer("clip-ViT-B-32")
+# ---------------------------------------------------
+# Collect unique image URLs (avoid redundant downloads)
+# ---------------------------------------------------
+unique_urls = list({p["image_url"] for p in products})
+print(f"🔗 Found {len(unique_urls)} unique image URLs")
+# ---------------------------------------------------
+# Compute embeddings for unique URLs
+# ---------------------------------------------------
+url_to_emb = {}
+for url in tqdm(unique_urls, desc="Embedding unique images"):
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        img = Image.open(io.BytesIO(response.content)).convert("RGB")
+        emb = model.encode(img, convert_to_numpy=True,
+                           normalize_embeddings=True)
+        url_to_emb[url] = emb
+    except Exception as e:
+        print(f"⚠️ Error processing {url}: {e}")
+        url_to_emb[url] = np.zeros(512, dtype=np.float32)  # fallback embedding
+# ---------------------------------------------------
+# Build embeddings array for all products
+# ---------------------------------------------------
+embeddings = []
+for p in products:
+    embeddings.append(url_to_emb[p["image_url"]])
+embeddings = np.array(embeddings).astype("float32")
+print(f"✅ Built embeddings array: {embeddings.shape}")
+# ---------------------------------------------------
+# Create FAISS index (cosine similarity via inner product)
+# ---------------------------------------------------
+dim = embeddings.shape[1]  # 512 for CLIP
+index = faiss.IndexFlatIP(dim)
+index.add(embeddings)
+# ---------------------------------------------------
+# Save FAISS index
+# ---------------------------------------------------
+faiss.write_index(index, INDEX_FILE)
+print(f"🎉 Saved FAISS index with {index.ntotal} vectors → {INDEX_FILE}")

main.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from fastapi import FastAPI, UploadFile, Form
+from fastapi.middleware.cors import CORSMiddleware
+import requests
+import io
+import faiss
+import json
+import os
+import numpy as np
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+# Init FastAPI
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # you can restrict to your Vercel URL later
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+# Load CLIP model once
+print("🧠 Loading CLIP model...")
+model = SentenceTransformer("clip-ViT-B-32")
+# Load dataset
+PRODUCTS_FILE = "products.json"
+INDEX_FILE = "products.index"
+with open(PRODUCTS_FILE, "r", encoding="utf-8", errors="ignore") as f:
+    products = json.load(f)
+# Build or load FAISS index
+if os.path.exists(INDEX_FILE):
+    print("📦 Loading existing FAISS index...")
+    index = faiss.read_index(INDEX_FILE)
+else:
+    print("⚡ Building FAISS index from products.json (first startup only)...")
+    # Encode product names (lightweight, avoids downloading images)
+    texts = [p["name"] + " " + p["category"] + " " + p["brand"]
+             for p in products]
+    embeddings = model.encode(
+        texts, convert_to_numpy=True, normalize_embeddings=True)
+    index = faiss.IndexFlatIP(embeddings.shape[1])
+    index.add(embeddings.astype("float32"))
+    faiss.write_index(index, INDEX_FILE)
+    print(f"✅ Saved FAISS index with {index.ntotal} vectors")
+def embed_image(img: Image.Image):
+    return model.encode(img, convert_to_numpy=True, normalize_embeddings=True)
+def embed_text(query: str):
+    return model.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
+@app.post("/match")
+async def match(
+    file: UploadFile = None,
+    image_url: str = Form(None),
+    min_score: float = Form(0.6),
+    top_k: int = Form(60),
+    categories: str = Form(None),
+    brands: str = Form(None),
+    min_price: float = Form(0),
+    max_price: float = Form(9999)
+):
+    try:
+        # Get query image
+        if file:
+            img = Image.open(io.BytesIO(await file.read())).convert("RGB")
+        elif image_url:
+            img = Image.open(io.BytesIO(requests.get(
+                image_url).content)).convert("RGB")
+        else:
+            return {"matches": []}
+        # Encode query
+        q_emb = embed_image(img).reshape(1, -1)
+        # Search FAISS
+        scores, ids = index.search(q_emb, top_k)
+        # Parse filters
+        categories = json.loads(categories) if categories else []
+        brands = json.loads(brands) if brands else []
+        # Collect results
+        results = []
+        for score, idx in zip(scores[0], ids[0]):
+            if score < min_score:
+                continue
+            p = products[idx]
+            # Apply filters
+            if categories and p["category"] not in categories:
+                continue
+            if brands and p["brand"] not in brands:
+                continue
+            if not (min_price <= p["price"] <= max_price):
+                continue
+            results.append({**p, "score": float(score)})
+        return {"matches": results}
+    except Exception as e:
+        return {"error": str(e)}
+@app.post("/search_text")
+async def search_text(
+    query: str = Form(...),
+    min_score: float = Form(0.6),
+    top_k: int = Form(60),
+    categories: str = Form(None),
+    brands: str = Form(None),
+    min_price: float = Form(0),
+    max_price: float = Form(9999)
+):
+    try:
+        # Encode text query
+        q_emb = embed_text(query).reshape(1, -1)
+        # Search FAISS
+        scores, ids = index.search(q_emb, top_k)
+        # Parse filters
+        categories = json.loads(categories) if categories else []
+        brands = json.loads(brands) if brands else []
+        # Collect results
+        results = []
+        for score, idx in zip(scores[0], ids[0]):
+            if score < min_score:
+                continue
+            p = products[idx]
+            # Apply filters
+            if categories and p["category"] not in categories:
+                continue
+            if brands and p["brand"] not in brands:
+                continue
+            if not (min_price <= p["price"] <= max_price):
+                continue
+            results.append({**p, "score": float(score)})
+        return {"matches": results}
+    except Exception as e:
+        return {"error": str(e)}

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn
+faiss-cpu
+sentence-transformers
+pillow
+requests