Spaces:
Running
Running
Commit
·
1006fab
1
Parent(s):
6365287
Added embedding
Browse files- .gitignore +3 -0
- app.py +2 -2
- cloudzy/agents/image_analyzer.py +91 -0
- cloudzy/ai_utils.py +64 -63
- cloudzy/routes/search.py +14 -3
- cloudzy/routes/upload.py +71 -5
- cloudzy/schemas.py +2 -0
- cloudzy/search_engine.py +23 -13
- requirements.txt +2 -1
.gitignore
CHANGED
|
@@ -45,6 +45,9 @@ faiss_index.bin
|
|
| 45 |
*.log
|
| 46 |
logs/
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
# Testing
|
| 49 |
.pytest_cache/
|
| 50 |
.coverage
|
|
|
|
| 45 |
*.log
|
| 46 |
logs/
|
| 47 |
|
| 48 |
+
*.npy
|
| 49 |
+
*.ids
|
| 50 |
+
|
| 51 |
# Testing
|
| 52 |
.pytest_cache/
|
| 53 |
.coverage
|
app.py
CHANGED
|
@@ -3,7 +3,7 @@ from fastapi import FastAPI
|
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
from fastapi.staticfiles import StaticFiles
|
| 6 |
-
|
| 7 |
|
| 8 |
from cloudzy.database import create_db_and_tables
|
| 9 |
from cloudzy.routes import upload, photo, search
|
|
@@ -12,7 +12,7 @@ import os
|
|
| 12 |
|
| 13 |
# Initialize search engine at startup
|
| 14 |
search_engine = None
|
| 15 |
-
|
| 16 |
|
| 17 |
@asynccontextmanager
|
| 18 |
async def lifespan(app: FastAPI):
|
|
|
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
from fastapi.staticfiles import StaticFiles
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
from cloudzy.database import create_db_and_tables
|
| 9 |
from cloudzy.routes import upload, photo, search
|
|
|
|
| 12 |
|
| 13 |
# Initialize search engine at startup
|
| 14 |
search_engine = None
|
| 15 |
+
load_dotenv()
|
| 16 |
|
| 17 |
@asynccontextmanager
|
| 18 |
async def lifespan(app: FastAPI):
|
cloudzy/agents/image_analyzer.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
class ImageDescriber:
|
| 10 |
+
"""
|
| 11 |
+
Class for generating descriptive metadata (tags, description, caption)
|
| 12 |
+
for an image using Hugging Face's inference endpoint via OpenAI client.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
# Read token from environment variable
|
| 17 |
+
api_key = os.getenv("HF_TOKEN_1")
|
| 18 |
+
if not api_key:
|
| 19 |
+
raise ValueError("Environment variable HF_TOKEN_1 is not set.")
|
| 20 |
+
|
| 21 |
+
# Initialize client
|
| 22 |
+
self.client = OpenAI(
|
| 23 |
+
base_url="https://router.huggingface.co/v1",
|
| 24 |
+
api_key=api_key,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Model to use
|
| 28 |
+
self.model = "Qwen/Qwen3-VL-8B-Instruct:novita"
|
| 29 |
+
|
| 30 |
+
def describe_image(self, image_url: str) -> dict:
|
| 31 |
+
"""
|
| 32 |
+
Sends the image to the model and returns a structured dictionary:
|
| 33 |
+
{
|
| 34 |
+
"tags": [...],
|
| 35 |
+
"description": "...",
|
| 36 |
+
"caption": "..."
|
| 37 |
+
}
|
| 38 |
+
"""
|
| 39 |
+
# Prompt for structured output
|
| 40 |
+
prompt = """
|
| 41 |
+
Describe this image in the following exact format:
|
| 42 |
+
|
| 43 |
+
result: {
|
| 44 |
+
"tags": [list of tags related to the image],
|
| 45 |
+
"description": "a 10-line descriptive description for the image",
|
| 46 |
+
"caption": "a short description for the image"
|
| 47 |
+
}
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
# Send request
|
| 51 |
+
completion = self.client.chat.completions.create(
|
| 52 |
+
model=self.model,
|
| 53 |
+
messages=[
|
| 54 |
+
{
|
| 55 |
+
"role": "user",
|
| 56 |
+
"content": [
|
| 57 |
+
{"type": "text", "text": prompt},
|
| 58 |
+
{"type": "image_url", "image_url": {"url": image_url}},
|
| 59 |
+
],
|
| 60 |
+
}
|
| 61 |
+
],
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Extract message text
|
| 65 |
+
message = completion.choices[0].message
|
| 66 |
+
text_content = message.content.strip()
|
| 67 |
+
|
| 68 |
+
# Try to extract JSON-like dict from model output
|
| 69 |
+
try:
|
| 70 |
+
start = text_content.index("{")
|
| 71 |
+
end = text_content.rindex("}") + 1
|
| 72 |
+
json_str = text_content[start:end]
|
| 73 |
+
result = json.loads(json_str)
|
| 74 |
+
except Exception as e:
|
| 75 |
+
raise ValueError(f"Failed to parse model output: {text_content}\nError: {e}")
|
| 76 |
+
|
| 77 |
+
return result
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def main():
|
| 81 |
+
"""
|
| 82 |
+
Entry point: takes image URL as input and prints parsed description.
|
| 83 |
+
"""
|
| 84 |
+
describer = ImageDescriber()
|
| 85 |
+
result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_2_20251024_082115_102.jpeg")
|
| 86 |
+
print("\n✅ Extracted Result:\n")
|
| 87 |
+
print(json.dumps(result, indent=2))
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
main()
|
cloudzy/ai_utils.py
CHANGED
|
@@ -1,72 +1,73 @@
|
|
| 1 |
-
|
| 2 |
import numpy as np
|
| 3 |
-
from
|
| 4 |
-
import random
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
""
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Common image tags for demo
|
| 19 |
-
common_tags = [
|
| 20 |
-
"photo", "image", "landscape", "portrait", "nature", "architecture",
|
| 21 |
-
"people", "animal", "food", "object", "abstract", "text", "sunset",
|
| 22 |
-
"mountain", "beach", "forest", "urban", "indoor", "outdoor"
|
| 23 |
-
]
|
| 24 |
-
|
| 25 |
-
# Select random subset of common tags + filename parts
|
| 26 |
-
tags = list(set(name_parts[:2] + random.sample(common_tags, min(3, len(common_tags)))))
|
| 27 |
-
return tags[:5] # Return up to 5 tags
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
Currently using placeholder logic.
|
| 35 |
-
"""
|
| 36 |
-
caption_templates = [
|
| 37 |
-
"A beautiful {tag} photograph",
|
| 38 |
-
"Captured moment: {tag}",
|
| 39 |
-
"Scenic view of {tag}",
|
| 40 |
-
"Amazing {tag} scene",
|
| 41 |
-
"Photography: {tag} collection",
|
| 42 |
-
]
|
| 43 |
-
|
| 44 |
-
tag = tags[0] if tags else "image"
|
| 45 |
-
template = random.choice(caption_templates)
|
| 46 |
-
return template.format(tag=tag)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
def
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
embedding = np.random.randn(512).astype(np.float32)
|
| 59 |
-
# Normalize to unit vector
|
| 60 |
-
embedding = embedding / np.linalg.norm(embedding)
|
| 61 |
-
return embedding
|
| 62 |
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
"""
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
import numpy as np
|
| 3 |
+
from huggingface_hub import InferenceClient
|
|
|
|
| 4 |
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
load_dotenv()
|
| 7 |
|
| 8 |
+
class ImageEmbeddingGenerator:
|
| 9 |
+
def __init__(self, model_name: str = "intfloat/multilingual-e5-large"):
|
| 10 |
+
"""
|
| 11 |
+
Initialize the embedding generator with a Hugging Face model.
|
| 12 |
+
"""
|
| 13 |
+
self.client = InferenceClient(
|
| 14 |
+
provider="hf-inference",
|
| 15 |
+
api_key=os.environ["HF_TOKEN_1"],
|
| 16 |
+
)
|
| 17 |
+
self.model_name = model_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
|
| 20 |
+
"""
|
| 21 |
+
Generate a 512-d embedding for an image using its tags, description, and caption.
|
| 22 |
|
| 23 |
+
Args:
|
| 24 |
+
tags: List of tags related to the image
|
| 25 |
+
description: Long descriptive text of the image
|
| 26 |
+
caption: Short caption for the image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
Returns:
|
| 29 |
+
embedding: 1D numpy array of shape (512,)
|
| 30 |
+
"""
|
| 31 |
+
# Combine text fields into a single string
|
| 32 |
+
text = " ".join(tags) + " " + description + " " + caption
|
| 33 |
+
|
| 34 |
+
# Request embedding from Hugging Face
|
| 35 |
+
result = self.client.feature_extraction(
|
| 36 |
+
text,
|
| 37 |
+
model=self.model_name,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Convert to numpy array
|
| 41 |
+
embedding = np.array(result, dtype=np.float32).reshape(-1)
|
| 42 |
+
|
| 43 |
+
# Ensure shape is (512,)
|
| 44 |
+
if embedding.shape[0] != 1024:
|
| 45 |
+
raise ValueError(f"Expected embedding of size 512, got {embedding.shape[0]}")
|
| 46 |
+
|
| 47 |
+
return embedding
|
| 48 |
+
|
| 49 |
|
| 50 |
+
def _embed_text(self, text: str) -> np.ndarray:
|
| 51 |
+
"""
|
| 52 |
+
Internal helper to call Hugging Face feature_extraction and return a numpy array.
|
| 53 |
+
"""
|
| 54 |
+
result = self.client.feature_extraction(
|
| 55 |
+
text,
|
| 56 |
+
model=self.model_name,
|
| 57 |
+
)
|
| 58 |
+
embedding = np.array(result, dtype=np.float32).reshape(-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
if embedding.shape[0] != 1024:
|
| 61 |
+
raise ValueError(f"Expected embedding of size 1024, got {embedding.shape[0]}")
|
| 62 |
+
return embedding
|
| 63 |
|
| 64 |
+
# Example usage:
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
generator = ImageEmbeddingGenerator()
|
| 67 |
+
|
| 68 |
+
tags = ["nature", "sun", "ice cream"]
|
| 69 |
+
description = "A sunny day in the park with children enjoying ice cream."
|
| 70 |
+
caption = "Sunny day with ice cream."
|
| 71 |
+
|
| 72 |
+
embedding = generator.generate_embedding(tags, description, caption)
|
| 73 |
+
print("Embedding shape:", embedding.shape)
|
cloudzy/routes/search.py
CHANGED
|
@@ -7,7 +7,9 @@ from cloudzy.database import get_session
|
|
| 7 |
from cloudzy.models import Photo
|
| 8 |
from cloudzy.schemas import SearchResponse, SearchResult
|
| 9 |
from cloudzy.search_engine import SearchEngine
|
| 10 |
-
from cloudzy.ai_utils import generate_filename_embedding
|
|
|
|
|
|
|
| 11 |
|
| 12 |
router = APIRouter(tags=["search"])
|
| 13 |
|
|
@@ -29,13 +31,17 @@ async def search_photos(
|
|
| 29 |
|
| 30 |
Returns: List of similar photos with distance scores
|
| 31 |
"""
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Search in FAISS
|
| 36 |
search_engine = SearchEngine()
|
| 37 |
search_results = search_engine.search(query_embedding, top_k=top_k)
|
| 38 |
|
|
|
|
| 39 |
if not search_results:
|
| 40 |
return SearchResponse(
|
| 41 |
query=q,
|
|
@@ -43,6 +49,10 @@ async def search_photos(
|
|
| 43 |
total_results=0,
|
| 44 |
)
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
# Fetch photo details from database
|
| 47 |
result_objects = []
|
| 48 |
for photo_id, distance in search_results:
|
|
@@ -54,6 +64,7 @@ async def search_photos(
|
|
| 54 |
SearchResult(
|
| 55 |
photo_id=photo.id,
|
| 56 |
filename=photo.filename,
|
|
|
|
| 57 |
tags=photo.get_tags(),
|
| 58 |
caption=photo.caption,
|
| 59 |
distance=distance,
|
|
|
|
| 7 |
from cloudzy.models import Photo
|
| 8 |
from cloudzy.schemas import SearchResponse, SearchResult
|
| 9 |
from cloudzy.search_engine import SearchEngine
|
| 10 |
+
# from cloudzy.ai_utils import generate_filename_embedding
|
| 11 |
+
from cloudzy.ai_utils import ImageEmbeddingGenerator
|
| 12 |
+
import os
|
| 13 |
|
| 14 |
router = APIRouter(tags=["search"])
|
| 15 |
|
|
|
|
| 31 |
|
| 32 |
Returns: List of similar photos with distance scores
|
| 33 |
"""
|
| 34 |
+
|
| 35 |
+
generator = ImageEmbeddingGenerator()
|
| 36 |
+
query_embedding = generator._embed_text(q)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
|
| 40 |
# Search in FAISS
|
| 41 |
search_engine = SearchEngine()
|
| 42 |
search_results = search_engine.search(query_embedding, top_k=top_k)
|
| 43 |
|
| 44 |
+
|
| 45 |
if not search_results:
|
| 46 |
return SearchResponse(
|
| 47 |
query=q,
|
|
|
|
| 49 |
total_results=0,
|
| 50 |
)
|
| 51 |
|
| 52 |
+
APP_DOMAIN = os.getenv("APP_DOMAIN")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
# Fetch photo details from database
|
| 57 |
result_objects = []
|
| 58 |
for photo_id, distance in search_results:
|
|
|
|
| 64 |
SearchResult(
|
| 65 |
photo_id=photo.id,
|
| 66 |
filename=photo.filename,
|
| 67 |
+
image_url = f"{APP_DOMAIN}uploads/{photo.filename}",
|
| 68 |
tags=photo.get_tags(),
|
| 69 |
caption=photo.caption,
|
| 70 |
distance=distance,
|
cloudzy/routes/upload.py
CHANGED
|
@@ -8,15 +8,55 @@ from cloudzy.database import get_session
|
|
| 8 |
from cloudzy.models import Photo
|
| 9 |
from cloudzy.schemas import UploadResponse
|
| 10 |
from cloudzy.utils.file_utils import save_uploaded_file
|
| 11 |
-
from cloudzy.ai_utils import
|
| 12 |
from cloudzy.search_engine import SearchEngine
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
router = APIRouter(tags=["photos"])
|
| 15 |
|
| 16 |
# Allowed image extensions
|
| 17 |
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
|
| 18 |
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def validate_image_file(filename: str) -> bool:
|
| 21 |
"""Check if file has valid image extension"""
|
| 22 |
return Path(filename).suffix.lower() in ALLOWED_EXTENSIONS
|
|
@@ -57,11 +97,36 @@ async def upload_photo(
|
|
| 57 |
# Save file to disk
|
| 58 |
saved_filename = save_uploaded_file(content, file.filename)
|
| 59 |
filepath = f"uploads/{saved_filename}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Generate AI analysis
|
| 62 |
-
tags =
|
| 63 |
-
caption =
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# Create photo record
|
| 67 |
photo = Photo(
|
|
@@ -70,7 +135,7 @@ async def upload_photo(
|
|
| 70 |
caption=caption,
|
| 71 |
)
|
| 72 |
photo.set_tags(tags)
|
| 73 |
-
photo.set_embedding(embedding.tolist())
|
| 74 |
|
| 75 |
# Save to database
|
| 76 |
session.add(photo)
|
|
@@ -84,6 +149,7 @@ async def upload_photo(
|
|
| 84 |
return UploadResponse(
|
| 85 |
id=photo.id,
|
| 86 |
filename=saved_filename,
|
|
|
|
| 87 |
tags=tags,
|
| 88 |
caption=caption,
|
| 89 |
message=f"Photo uploaded successfully with ID {photo.id}"
|
|
|
|
| 8 |
from cloudzy.models import Photo
|
| 9 |
from cloudzy.schemas import UploadResponse
|
| 10 |
from cloudzy.utils.file_utils import save_uploaded_file
|
| 11 |
+
from cloudzy.ai_utils import ImageEmbeddingGenerator
|
| 12 |
from cloudzy.search_engine import SearchEngine
|
| 13 |
|
| 14 |
+
from cloudzy.agents.image_analyzer import ImageDescriber
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
import os
|
| 18 |
+
|
| 19 |
router = APIRouter(tags=["photos"])
|
| 20 |
|
| 21 |
# Allowed image extensions
|
| 22 |
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
|
| 23 |
|
| 24 |
|
| 25 |
+
result = {
|
| 26 |
+
"tags": [
|
| 27 |
+
"tiger",
|
| 28 |
+
"wildlife",
|
| 29 |
+
"predator",
|
| 30 |
+
"forest",
|
| 31 |
+
"golden hour",
|
| 32 |
+
"nature",
|
| 33 |
+
"animal",
|
| 34 |
+
"walking",
|
| 35 |
+
"orange",
|
| 36 |
+
"striped"
|
| 37 |
+
],
|
| 38 |
+
"description": "A majestic tiger strides forward with purpose through a dry, golden-hued forest. Its powerful body and distinctive orange-and-black striped coat are clearly visible as it moves along a dirt path. The background is softly blurred, emphasizing the tiger's presence and creating a sense of depth. Warm sunlight bathes the scene, highlighting the texture of its fur and the surrounding dry grass and trees. The tiger's intense gaze is fixed ahead, conveying both power and focus. This image captures the raw beauty and untamed spirit of this apex predator in its natural habitat during what appears to be the golden hour.",
|
| 39 |
+
"caption": "A tiger walks confidently through a sun-drenched forest at golden hour."
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# result = {
|
| 43 |
+
# "tags": [
|
| 44 |
+
# "woman",
|
| 45 |
+
# "photography",
|
| 46 |
+
# "camera",
|
| 47 |
+
# "smiling",
|
| 48 |
+
# "car",
|
| 49 |
+
# "travel",
|
| 50 |
+
# "outdoors",
|
| 51 |
+
# "film",
|
| 52 |
+
# "plaid",
|
| 53 |
+
# "window"
|
| 54 |
+
# ],
|
| 55 |
+
# "description": "A cheerful woman with long brown hair is leaning out of a car window, holding a vintage-style film camera up to her eye. She's wearing a red, white, and blue plaid shirt and has a bright, joyful smile. The background is softly blurred with green trees and an overcast sky, suggesting a scenic road trip. The warm lighting highlights her face and the leather strap of the camera. The composition captures a candid, adventurous moment of travel and photography.",
|
| 56 |
+
# "caption": "Smiling woman taking photos from a car window on a scenic road trip."
|
| 57 |
+
# }
|
| 58 |
+
|
| 59 |
+
|
| 60 |
def validate_image_file(filename: str) -> bool:
|
| 61 |
"""Check if file has valid image extension"""
|
| 62 |
return Path(filename).suffix.lower() in ALLOWED_EXTENSIONS
|
|
|
|
| 97 |
# Save file to disk
|
| 98 |
saved_filename = save_uploaded_file(content, file.filename)
|
| 99 |
filepath = f"uploads/{saved_filename}"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
APP_DOMAIN = os.getenv("APP_DOMAIN")
|
| 103 |
+
|
| 104 |
+
image_url = f"{APP_DOMAIN}uploads/{saved_filename}"
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
|
| 108 |
+
describer = ImageDescriber()
|
| 109 |
+
# result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_1_20251024_064435_667.jpg")
|
| 110 |
+
# result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_2_20251024_082115_102.jpeg")
|
| 111 |
+
result = describer.describe_image(image_url)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
|
| 117 |
|
| 118 |
# Generate AI analysis
|
| 119 |
+
tags = result.get("tags", [])
|
| 120 |
+
caption = result.get("caption", "")
|
| 121 |
+
description = result.get("description", "")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
generator = ImageEmbeddingGenerator()
|
| 126 |
+
embedding = generator.generate_embedding(tags, description, caption)
|
| 127 |
+
|
| 128 |
+
# np.save("embedding_2.npy", embedding)
|
| 129 |
+
# embedding = np.load("embedding_2.npy")
|
| 130 |
|
| 131 |
# Create photo record
|
| 132 |
photo = Photo(
|
|
|
|
| 135 |
caption=caption,
|
| 136 |
)
|
| 137 |
photo.set_tags(tags)
|
| 138 |
+
# photo.set_embedding(embedding.tolist())
|
| 139 |
|
| 140 |
# Save to database
|
| 141 |
session.add(photo)
|
|
|
|
| 149 |
return UploadResponse(
|
| 150 |
id=photo.id,
|
| 151 |
filename=saved_filename,
|
| 152 |
+
image_url= image_url,
|
| 153 |
tags=tags,
|
| 154 |
caption=caption,
|
| 155 |
message=f"Photo uploaded successfully with ID {photo.id}"
|
cloudzy/schemas.py
CHANGED
|
@@ -25,6 +25,7 @@ class SearchResult(BaseModel):
|
|
| 25 |
"""Search result with similarity score"""
|
| 26 |
photo_id: int
|
| 27 |
filename: str
|
|
|
|
| 28 |
tags: List[str]
|
| 29 |
caption: str
|
| 30 |
distance: float # L2 distance (lower is more similar)
|
|
@@ -44,6 +45,7 @@ class UploadResponse(BaseModel):
|
|
| 44 |
"""Response after uploading a photo"""
|
| 45 |
id: int
|
| 46 |
filename: str
|
|
|
|
| 47 |
tags: List[str]
|
| 48 |
caption: str
|
| 49 |
message: str
|
|
|
|
| 25 |
"""Search result with similarity score"""
|
| 26 |
photo_id: int
|
| 27 |
filename: str
|
| 28 |
+
image_url: str
|
| 29 |
tags: List[str]
|
| 30 |
caption: str
|
| 31 |
distance: float # L2 distance (lower is more similar)
|
|
|
|
| 45 |
"""Response after uploading a photo"""
|
| 46 |
id: int
|
| 47 |
filename: str
|
| 48 |
+
image_url: str
|
| 49 |
tags: List[str]
|
| 50 |
caption: str
|
| 51 |
message: str
|
cloudzy/search_engine.py
CHANGED
|
@@ -3,12 +3,13 @@ import faiss
|
|
| 3 |
import numpy as np
|
| 4 |
from typing import List, Tuple, Optional
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
class SearchEngine:
|
| 9 |
"""FAISS-based search engine for image embeddings"""
|
| 10 |
|
| 11 |
-
def __init__(self, dim: int =
|
| 12 |
self.dim = dim
|
| 13 |
self.index_path = index_path
|
| 14 |
self.id_map: List[int] = [] # Map FAISS indices to photo IDs
|
|
@@ -42,39 +43,48 @@ class SearchEngine:
|
|
| 42 |
def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Tuple[int, float]]:
|
| 43 |
"""
|
| 44 |
Search for similar embeddings.
|
| 45 |
-
|
| 46 |
Args:
|
| 47 |
query_embedding: 1D numpy array of shape (dim,)
|
| 48 |
top_k: Number of results to return
|
| 49 |
-
|
| 50 |
Returns:
|
| 51 |
-
List of (photo_id, distance) tuples
|
| 52 |
"""
|
|
|
|
|
|
|
|
|
|
| 53 |
if self.index.ntotal == 0:
|
| 54 |
return []
|
| 55 |
-
|
| 56 |
# Ensure query is float32 and correct shape
|
| 57 |
query_embedding = query_embedding.astype(np.float32).reshape(1, -1)
|
| 58 |
-
|
| 59 |
# Search in FAISS index
|
| 60 |
distances, indices = self.index.search(query_embedding, min(top_k, self.index.ntotal))
|
| 61 |
-
|
| 62 |
-
# Map back to photo IDs
|
| 63 |
results = [
|
| 64 |
(self.id_map[int(idx)], float(distance))
|
| 65 |
for distance, idx in zip(distances[0], indices[0])
|
|
|
|
| 66 |
]
|
| 67 |
-
|
| 68 |
return results
|
| 69 |
-
|
| 70 |
def save(self) -> None:
|
| 71 |
-
"""Save index to disk"""
|
| 72 |
faiss.write_index(self.index, self.index_path)
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
def load(self) -> None:
|
| 75 |
-
"""Load index from disk"""
|
| 76 |
if os.path.exists(self.index_path):
|
| 77 |
self.index = faiss.read_index(self.index_path)
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
def get_stats(self) -> dict:
|
| 80 |
"""Get index statistics"""
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from typing import List, Tuple, Optional
|
| 5 |
import os
|
| 6 |
+
import pickle
|
| 7 |
|
| 8 |
|
| 9 |
class SearchEngine:
|
| 10 |
"""FAISS-based search engine for image embeddings"""
|
| 11 |
|
| 12 |
+
def __init__(self, dim: int = 1024, index_path: str = "faiss_index.bin"):
|
| 13 |
self.dim = dim
|
| 14 |
self.index_path = index_path
|
| 15 |
self.id_map: List[int] = [] # Map FAISS indices to photo IDs
|
|
|
|
| 43 |
def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Tuple[int, float]]:
|
| 44 |
"""
|
| 45 |
Search for similar embeddings.
|
| 46 |
+
|
| 47 |
Args:
|
| 48 |
query_embedding: 1D numpy array of shape (dim,)
|
| 49 |
top_k: Number of results to return
|
| 50 |
+
|
| 51 |
Returns:
|
| 52 |
+
List of (photo_id, distance) tuples with distance <= 0.4
|
| 53 |
"""
|
| 54 |
+
|
| 55 |
+
self.load()
|
| 56 |
+
|
| 57 |
if self.index.ntotal == 0:
|
| 58 |
return []
|
| 59 |
+
|
| 60 |
# Ensure query is float32 and correct shape
|
| 61 |
query_embedding = query_embedding.astype(np.float32).reshape(1, -1)
|
| 62 |
+
|
| 63 |
# Search in FAISS index
|
| 64 |
distances, indices = self.index.search(query_embedding, min(top_k, self.index.ntotal))
|
| 65 |
+
|
| 66 |
+
# Map back to photo IDs and filter distances > 0.4
|
| 67 |
results = [
|
| 68 |
(self.id_map[int(idx)], float(distance))
|
| 69 |
for distance, idx in zip(distances[0], indices[0])
|
| 70 |
+
if distance <= 0.5
|
| 71 |
]
|
| 72 |
+
|
| 73 |
return results
|
| 74 |
+
|
| 75 |
def save(self) -> None:
|
| 76 |
+
"""Save index and id_map to disk"""
|
| 77 |
faiss.write_index(self.index, self.index_path)
|
| 78 |
+
with open(self.index_path + ".ids", "wb") as f:
|
| 79 |
+
pickle.dump(self.id_map, f)
|
| 80 |
+
|
| 81 |
def load(self) -> None:
|
| 82 |
+
"""Load index and id_map from disk"""
|
| 83 |
if os.path.exists(self.index_path):
|
| 84 |
self.index = faiss.read_index(self.index_path)
|
| 85 |
+
if os.path.exists(self.index_path + ".ids"):
|
| 86 |
+
with open(self.index_path + ".ids", "rb") as f:
|
| 87 |
+
self.id_map = pickle.load(f)
|
| 88 |
|
| 89 |
def get_stats(self) -> dict:
|
| 90 |
"""Get index statistics"""
|
requirements.txt
CHANGED
|
@@ -9,4 +9,5 @@ faiss-cpu==1.8.0
|
|
| 9 |
python-multipart==0.0.6
|
| 10 |
pydantic==2.6.1
|
| 11 |
pydantic-settings==2.1.0
|
| 12 |
-
setuptools>=68.0
|
|
|
|
|
|
| 9 |
python-multipart==0.0.6
|
| 10 |
pydantic==2.6.1
|
| 11 |
pydantic-settings==2.1.0
|
| 12 |
+
setuptools>=68.0
|
| 13 |
+
openai==2.6.0
|