Spaces:
Sleeping
Sleeping
File size: 4,399 Bytes
c6706bd 1006fab c6706bd 1006fab c6706bd 1006fab c6706bd 1006fab c6706bd 1006fab c6706bd 4d4fccb c6706bd 4d4fccb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
"""Semantic search endpoint using FAISS"""
from fastapi import APIRouter, Query, Depends, HTTPException
from sqlmodel import Session, select
import numpy as np
from cloudzy.database import get_session
from cloudzy.models import Photo
from cloudzy.schemas import SearchResponse, SearchResult
from cloudzy.search_engine import SearchEngine
# from cloudzy.ai_utils import generate_filename_embedding
from cloudzy.ai_utils import ImageEmbeddingGenerator
import os
router = APIRouter(tags=["search"])
@router.get("/search", response_model=SearchResponse)
async def search_photos(
q: str = Query(..., min_length=1, max_length=200, description="Search query"),
top_k: int = Query(5, ge=1, le=50, description="Number of results"),
session: Session = Depends(get_session),
):
"""
Semantic search for similar photos using FAISS.
Converts query to embedding and finds most similar images.
Args:
q: Search query (used to generate embedding)
top_k: Number of results to return (max 50)
Returns: List of similar photos with distance scores
"""
generator = ImageEmbeddingGenerator()
query_embedding = generator._embed_text(q)
# Search in FAISS
search_engine = SearchEngine()
search_results = search_engine.search(query_embedding, top_k=top_k)
if not search_results:
return SearchResponse(
query=q,
results=[],
total_results=0,
)
APP_DOMAIN = os.getenv("APP_DOMAIN")
# Fetch photo details from database
result_objects = []
for photo_id, distance in search_results:
statement = select(Photo).where(Photo.id == photo_id)
photo = session.exec(statement).first()
if photo: # Only include if photo exists in DB
result_objects.append(
SearchResult(
photo_id=photo.id,
filename=photo.filename,
image_url = f"{APP_DOMAIN}uploads/{photo.filename}",
tags=photo.get_tags(),
caption=photo.caption,
distance=distance,
)
)
return SearchResponse(
query=q,
results=result_objects,
total_results=len(result_objects),
)
# @router.post("/search/image-to-image")
# async def image_to_image_search(
# reference_photo_id: int = Query(..., description="Reference photo ID"),
# top_k: int = Query(5, ge=1, le=50),
# session: Session = Depends(get_session),
# ):
# """
# Find similar images to a reference photo (image-to-image search).
# Args:
# reference_photo_id: ID of the reference photo
# top_k: Number of similar results
# Returns: Similar photos
# """
# # Get reference photo
# statement = select(Photo).where(Photo.id == reference_photo_id)
# reference_photo = session.exec(statement).first()
# if not reference_photo:
# raise HTTPException(status_code=404, detail=f"Photo {reference_photo_id} not found")
# # Get reference embedding
# reference_embedding = reference_photo.get_embedding()
# if not reference_embedding:
# raise HTTPException(status_code=400, detail="Photo has no embedding")
# # Search in FAISS
# search_engine = SearchEngine()
# search_results = search_engine.search(
# np.array(reference_embedding, dtype=np.float32),
# top_k=top_k + 1 # +1 to skip the reference photo itself
# )
# # Build results (skip first result which is the reference photo itself)
# result_objects = []
# for photo_id, distance in search_results[1:]: # Skip first result
# statement = select(Photo).where(Photo.id == photo_id)
# photo = session.exec(statement).first()
# if photo:
# result_objects.append(
# SearchResult(
# photo_id=photo.id,
# filename=photo.filename,
# tags=photo.get_tags(),
# caption=photo.caption,
# distance=distance,
# )
# )
# return SearchResponse(
# query=f"Similar to photo {reference_photo_id}",
# results=result_objects[:top_k],
# total_results=len(result_objects),
# ) |