Spaces:

userx2000
/

cloudzy_ai_challenge

Running

App Files Files Community

matinsn2000 commited on 4 days ago

Commit

1006fab

1 Parent(s): 6365287

Added embedding

Browse files

Files changed (9) hide show

.gitignore +3 -0
app.py +2 -2
cloudzy/agents/image_analyzer.py +91 -0
cloudzy/ai_utils.py +64 -63
cloudzy/routes/search.py +14 -3
cloudzy/routes/upload.py +71 -5
cloudzy/schemas.py +2 -0
cloudzy/search_engine.py +23 -13
requirements.txt +2 -1

.gitignore CHANGED Viewed

@@ -45,6 +45,9 @@ faiss_index.bin
 *.log
 logs/
 # Testing
 .pytest_cache/
 .coverage

 *.log
 logs/
+*.npy
+*.ids
 # Testing
 .pytest_cache/
 .coverage

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
 from fastapi.staticfiles import StaticFiles
 from cloudzy.database import create_db_and_tables
 from cloudzy.routes import upload, photo, search
@@ -12,7 +12,7 @@ import os
 # Initialize search engine at startup
 search_engine = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):

 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
 from fastapi.staticfiles import StaticFiles
+from dotenv import load_dotenv
 from cloudzy.database import create_db_and_tables
 from cloudzy.routes import upload, photo, search
 # Initialize search engine at startup
 search_engine = None
+load_dotenv()
 @asynccontextmanager
 async def lifespan(app: FastAPI):

cloudzy/agents/image_analyzer.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import json
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+class ImageDescriber:
+    """
+    Class for generating descriptive metadata (tags, description, caption)
+    for an image using Hugging Face's inference endpoint via OpenAI client.
+    """
+    def __init__(self):
+        # Read token from environment variable
+        api_key = os.getenv("HF_TOKEN_1")
+        if not api_key:
+            raise ValueError("Environment variable HF_TOKEN_1 is not set.")
+        # Initialize client
+        self.client = OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=api_key,
+        )
+        # Model to use
+        self.model = "Qwen/Qwen3-VL-8B-Instruct:novita"
+    def describe_image(self, image_url: str) -> dict:
+        """
+        Sends the image to the model and returns a structured dictionary:
+        {
+            "tags": [...],
+            "description": "...",
+            "caption": "..."
+        }
+        """
+        # Prompt for structured output
+        prompt = """
+Describe this image in the following exact format:
+result: {
+  "tags": [list of tags related to the image],
+  "description": "a 10-line descriptive description for the image",
+  "caption": "a short description for the image"
+}
+"""
+        # Send request
+        completion = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {"type": "image_url", "image_url": {"url": image_url}},
+                    ],
+                }
+            ],
+        )
+        # Extract message text
+        message = completion.choices[0].message
+        text_content = message.content.strip()
+        # Try to extract JSON-like dict from model output
+        try:
+            start = text_content.index("{")
+            end = text_content.rindex("}") + 1
+            json_str = text_content[start:end]
+            result = json.loads(json_str)
+        except Exception as e:
+            raise ValueError(f"Failed to parse model output: {text_content}\nError: {e}")
+        return result
+def main():
+    """
+    Entry point: takes image URL as input and prints parsed description.
+    """
+    describer = ImageDescriber()
+    result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_2_20251024_082115_102.jpeg")
+    print("\n✅ Extracted Result:\n")
+    print(json.dumps(result, indent=2))
+if __name__ == "__main__":
+    main()

cloudzy/ai_utils.py CHANGED Viewed

@@ -1,72 +1,73 @@
-"""AI utilities for generating tags, captions, and embeddings"""
 import numpy as np
-from typing import List, Tuple
-import random
-def generate_tags(filename: str) -> List[str]:
-    """
-    Generate tags for an image based on filename.
-    In production, this would use CLIP or similar models.
-    Currently using placeholder logic.
-    """
-    # Extract meaningful words from filename
-    name_parts = filename.lower().replace("_", " ").replace("-", " ").split()
-    name_parts = [p.replace(".jpg", "").replace(".png", "").replace(".jpeg", "")
-                  for p in name_parts if p]
-    # Common image tags for demo
-    common_tags = [
-        "photo", "image", "landscape", "portrait", "nature", "architecture",
-        "people", "animal", "food", "object", "abstract", "text", "sunset",
-        "mountain", "beach", "forest", "urban", "indoor", "outdoor"
-    ]
-    # Select random subset of common tags + filename parts
-    tags = list(set(name_parts[:2] + random.sample(common_tags, min(3, len(common_tags)))))
-    return tags[:5]  # Return up to 5 tags
-def generate_caption(filename: str, tags: List[str]) -> str:
-    """
-    Generate a caption for an image.
-    In production, this would use BLIP or similar models.
-    Currently using placeholder logic.
-    """
-    caption_templates = [
-        "A beautiful {tag} photograph",
-        "Captured moment: {tag}",
-        "Scenic view of {tag}",
-        "Amazing {tag} scene",
-        "Photography: {tag} collection",
-    ]
-    tag = tags[0] if tags else "image"
-    template = random.choice(caption_templates)
-    return template.format(tag=tag)
-def generate_embedding(filename: str, tags: List[str], caption: str) -> np.ndarray:
-    """
-    Generate a 512-dimensional embedding for semantic search.
-    In production, this would use CLIP or similar models.
-    Currently using placeholder random embeddings (reproducible from filename).
-    """
-    # Create a reproducible random embedding based on filename
-    # In production: use CLIP or similar to generate real embeddings
-    random.seed(hash(filename) % (2**32))
-    embedding = np.random.randn(512).astype(np.float32)
-    # Normalize to unit vector
-    embedding = embedding / np.linalg.norm(embedding)
-    return embedding
-def generate_filename_embedding(filename: str) -> np.ndarray:
-    """
-    Generate a deterministic embedding from filename for testing.
-    Ensures same filename always gets same embedding.
-    """
-    random.seed(hash(filename) % (2**32))
-    embedding = np.random.randn(512).astype(np.float32)
-    embedding = embedding / np.linalg.norm(embedding)
-    return embedding

+import os
 import numpy as np
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+class ImageEmbeddingGenerator:
+    def __init__(self, model_name: str = "intfloat/multilingual-e5-large"):
+        """
+        Initialize the embedding generator with a Hugging Face model.
+        """
+        self.client = InferenceClient(
+            provider="hf-inference",
+            api_key=os.environ["HF_TOKEN_1"],
+        )
+        self.model_name = model_name
+    def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
+        """
+        Generate a 512-d embedding for an image using its tags, description, and caption.
+        Args:
+            tags: List of tags related to the image
+            description: Long descriptive text of the image
+            caption: Short caption for the image
+        Returns:
+            embedding: 1D numpy array of shape (512,)
+        """
+        # Combine text fields into a single string
+        text = " ".join(tags) + " " + description + " " + caption
+        # Request embedding from Hugging Face
+        result = self.client.feature_extraction(
+            text,
+            model=self.model_name,
+        )
+        # Convert to numpy array
+        embedding = np.array(result, dtype=np.float32).reshape(-1)
+        # Ensure shape is (512,)
+        if embedding.shape[0] != 1024:
+            raise ValueError(f"Expected embedding of size 512, got {embedding.shape[0]}")
+        return embedding
+    def _embed_text(self, text: str) -> np.ndarray:
+        """
+        Internal helper to call Hugging Face feature_extraction and return a numpy array.
+        """
+        result = self.client.feature_extraction(
+            text,
+            model=self.model_name,
+        )
+        embedding = np.array(result, dtype=np.float32).reshape(-1)
+        if embedding.shape[0] != 1024:
+            raise ValueError(f"Expected embedding of size 1024, got {embedding.shape[0]}")
+        return embedding
+# Example usage:
+if __name__ == "__main__":
+    generator = ImageEmbeddingGenerator()
+    tags = ["nature", "sun", "ice cream"]
+    description = "A sunny day in the park with children enjoying ice cream."
+    caption = "Sunny day with ice cream."
+    embedding = generator.generate_embedding(tags, description, caption)
+    print("Embedding shape:", embedding.shape)

cloudzy/routes/search.py CHANGED Viewed

@@ -7,7 +7,9 @@ from cloudzy.database import get_session
 from cloudzy.models import Photo
 from cloudzy.schemas import SearchResponse, SearchResult
 from cloudzy.search_engine import SearchEngine
-from cloudzy.ai_utils import generate_filename_embedding
 router = APIRouter(tags=["search"])
@@ -29,13 +31,17 @@ async def search_photos(
     Returns: List of similar photos with distance scores
     """
-    # Generate embedding for query
-    query_embedding = generate_filename_embedding(q)
     # Search in FAISS
     search_engine = SearchEngine()
     search_results = search_engine.search(query_embedding, top_k=top_k)
     if not search_results:
         return SearchResponse(
             query=q,
@@ -43,6 +49,10 @@ async def search_photos(
             total_results=0,
         )
     # Fetch photo details from database
     result_objects = []
     for photo_id, distance in search_results:
@@ -54,6 +64,7 @@ async def search_photos(
                 SearchResult(
                     photo_id=photo.id,
                     filename=photo.filename,
                     tags=photo.get_tags(),
                     caption=photo.caption,
                     distance=distance,

 from cloudzy.models import Photo
 from cloudzy.schemas import SearchResponse, SearchResult
 from cloudzy.search_engine import SearchEngine
+# from cloudzy.ai_utils import generate_filename_embedding
+from cloudzy.ai_utils import  ImageEmbeddingGenerator
+import os
 router = APIRouter(tags=["search"])
     Returns: List of similar photos with distance scores
     """
+    generator = ImageEmbeddingGenerator()
+    query_embedding = generator._embed_text(q)
     # Search in FAISS
     search_engine = SearchEngine()
     search_results = search_engine.search(query_embedding, top_k=top_k)
     if not search_results:
         return SearchResponse(
             query=q,
             total_results=0,
         )
+    APP_DOMAIN = os.getenv("APP_DOMAIN")
     # Fetch photo details from database
     result_objects = []
     for photo_id, distance in search_results:
                 SearchResult(
                     photo_id=photo.id,
                     filename=photo.filename,
+                    image_url = f"{APP_DOMAIN}uploads/{photo.filename}",
                     tags=photo.get_tags(),
                     caption=photo.caption,
                     distance=distance,

cloudzy/routes/upload.py CHANGED Viewed

@@ -8,15 +8,55 @@ from cloudzy.database import get_session
 from cloudzy.models import Photo
 from cloudzy.schemas import UploadResponse
 from cloudzy.utils.file_utils import save_uploaded_file
-from cloudzy.ai_utils import generate_tags, generate_caption, generate_embedding
 from cloudzy.search_engine import SearchEngine
 router = APIRouter(tags=["photos"])
 # Allowed image extensions
 ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
 def validate_image_file(filename: str) -> bool:
     """Check if file has valid image extension"""
     return Path(filename).suffix.lower() in ALLOWED_EXTENSIONS
@@ -57,11 +97,36 @@ async def upload_photo(
     # Save file to disk
     saved_filename = save_uploaded_file(content, file.filename)
     filepath = f"uploads/{saved_filename}"
     # Generate AI analysis
-    tags = generate_tags(file.filename)
-    caption = generate_caption(file.filename, tags)
-    embedding = generate_embedding(file.filename, tags, caption)
     # Create photo record
     photo = Photo(
@@ -70,7 +135,7 @@ async def upload_photo(
         caption=caption,
     )
     photo.set_tags(tags)
-    photo.set_embedding(embedding.tolist())
     # Save to database
     session.add(photo)
@@ -84,6 +149,7 @@ async def upload_photo(
     return UploadResponse(
         id=photo.id,
         filename=saved_filename,
         tags=tags,
         caption=caption,
         message=f"Photo uploaded successfully with ID {photo.id}"

 from cloudzy.models import Photo
 from cloudzy.schemas import UploadResponse
 from cloudzy.utils.file_utils import save_uploaded_file
+from cloudzy.ai_utils import  ImageEmbeddingGenerator
 from cloudzy.search_engine import SearchEngine
+from cloudzy.agents.image_analyzer import ImageDescriber
+import os
 router = APIRouter(tags=["photos"])
 # Allowed image extensions
 ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
+result = {
+  "tags": [
+    "tiger",
+    "wildlife",
+    "predator",
+    "forest",
+    "golden hour",
+    "nature",
+    "animal",
+    "walking",
+    "orange",
+    "striped"
+  ],
+  "description": "A majestic tiger strides forward with purpose through a dry, golden-hued forest. Its powerful body and distinctive orange-and-black striped coat are clearly visible as it moves along a dirt path. The background is softly blurred, emphasizing the tiger's presence and creating a sense of depth. Warm sunlight bathes the scene, highlighting the texture of its fur and the surrounding dry grass and trees. The tiger's intense gaze is fixed ahead, conveying both power and focus. This image captures the raw beauty and untamed spirit of this apex predator in its natural habitat during what appears to be the golden hour.",
+  "caption": "A tiger walks confidently through a sun-drenched forest at golden hour."
+}
+# result = {
+#   "tags": [
+#     "woman",
+#     "photography",
+#     "camera",
+#     "smiling",
+#     "car",
+#     "travel",
+#     "outdoors",
+#     "film",
+#     "plaid",
+#     "window"
+#   ],
+#   "description": "A cheerful woman with long brown hair is leaning out of a car window, holding a vintage-style film camera up to her eye. She's wearing a red, white, and blue plaid shirt and has a bright, joyful smile. The background is softly blurred with green trees and an overcast sky, suggesting a scenic road trip. The warm lighting highlights her face and the leather strap of the camera. The composition captures a candid, adventurous moment of travel and photography.",
+#   "caption": "Smiling woman taking photos from a car window on a scenic road trip."
+# }
 def validate_image_file(filename: str) -> bool:
     """Check if file has valid image extension"""
     return Path(filename).suffix.lower() in ALLOWED_EXTENSIONS
     # Save file to disk
     saved_filename = save_uploaded_file(content, file.filename)
     filepath = f"uploads/{saved_filename}"
+    APP_DOMAIN = os.getenv("APP_DOMAIN")
+    image_url = f"{APP_DOMAIN}uploads/{saved_filename}"
+    try:
+        describer = ImageDescriber()
+        # result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_1_20251024_064435_667.jpg")
+        # result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_2_20251024_082115_102.jpeg")
+        result = describer.describe_image(image_url)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
     # Generate AI analysis
+    tags = result.get("tags", [])
+    caption = result.get("caption", "")
+    description = result.get("description", "")
+    generator = ImageEmbeddingGenerator()
+    embedding = generator.generate_embedding(tags, description, caption)
+    # np.save("embedding_2.npy", embedding)
+    # embedding = np.load("embedding_2.npy")
     # Create photo record
     photo = Photo(
         caption=caption,
     )
     photo.set_tags(tags)
+    # photo.set_embedding(embedding.tolist())
     # Save to database
     session.add(photo)
     return UploadResponse(
         id=photo.id,
         filename=saved_filename,
+        image_url= image_url,
         tags=tags,
         caption=caption,
         message=f"Photo uploaded successfully with ID {photo.id}"

cloudzy/schemas.py CHANGED Viewed

@@ -25,6 +25,7 @@ class SearchResult(BaseModel):
     """Search result with similarity score"""
     photo_id: int
     filename: str
     tags: List[str]
     caption: str
     distance: float  # L2 distance (lower is more similar)
@@ -44,6 +45,7 @@ class UploadResponse(BaseModel):
     """Response after uploading a photo"""
     id: int
     filename: str
     tags: List[str]
     caption: str
     message: str

     """Search result with similarity score"""
     photo_id: int
     filename: str
+    image_url: str
     tags: List[str]
     caption: str
     distance: float  # L2 distance (lower is more similar)
     """Response after uploading a photo"""
     id: int
     filename: str
+    image_url: str
     tags: List[str]
     caption: str
     message: str

cloudzy/search_engine.py CHANGED Viewed

@@ -3,12 +3,13 @@ import faiss
 import numpy as np
 from typing import List, Tuple, Optional
 import os
 class SearchEngine:
     """FAISS-based search engine for image embeddings"""
-    def __init__(self, dim: int = 512, index_path: str = "faiss_index.bin"):
         self.dim = dim
         self.index_path = index_path
         self.id_map: List[int] = []  # Map FAISS indices to photo IDs
@@ -42,39 +43,48 @@ class SearchEngine:
     def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Tuple[int, float]]:
         """
         Search for similar embeddings.
         Args:
             query_embedding: 1D numpy array of shape (dim,)
             top_k: Number of results to return
         Returns:
-            List of (photo_id, distance) tuples
         """
         if self.index.ntotal == 0:
             return []
         # Ensure query is float32 and correct shape
         query_embedding = query_embedding.astype(np.float32).reshape(1, -1)
         # Search in FAISS index
         distances, indices = self.index.search(query_embedding, min(top_k, self.index.ntotal))
-        # Map back to photo IDs
         results = [
             (self.id_map[int(idx)], float(distance))
             for distance, idx in zip(distances[0], indices[0])
         ]
         return results
     def save(self) -> None:
-        """Save index to disk"""
         faiss.write_index(self.index, self.index_path)
     def load(self) -> None:
-        """Load index from disk"""
         if os.path.exists(self.index_path):
             self.index = faiss.read_index(self.index_path)
     def get_stats(self) -> dict:
         """Get index statistics"""

 import numpy as np
 from typing import List, Tuple, Optional
 import os
+import pickle
 class SearchEngine:
     """FAISS-based search engine for image embeddings"""
+    def __init__(self, dim: int = 1024, index_path: str = "faiss_index.bin"):
         self.dim = dim
         self.index_path = index_path
         self.id_map: List[int] = []  # Map FAISS indices to photo IDs
     def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Tuple[int, float]]:
         """
         Search for similar embeddings.
         Args:
             query_embedding: 1D numpy array of shape (dim,)
             top_k: Number of results to return
         Returns:
+            List of (photo_id, distance) tuples with distance <= 0.4
         """
+        self.load()
         if self.index.ntotal == 0:
             return []
         # Ensure query is float32 and correct shape
         query_embedding = query_embedding.astype(np.float32).reshape(1, -1)
         # Search in FAISS index
         distances, indices = self.index.search(query_embedding, min(top_k, self.index.ntotal))
+        # Map back to photo IDs and filter distances > 0.4
         results = [
             (self.id_map[int(idx)], float(distance))
             for distance, idx in zip(distances[0], indices[0])
+            if distance <= 0.5
         ]
         return results
     def save(self) -> None:
+        """Save index and id_map to disk"""
         faiss.write_index(self.index, self.index_path)
+        with open(self.index_path + ".ids", "wb") as f:
+            pickle.dump(self.id_map, f)
     def load(self) -> None:
+        """Load index and id_map from disk"""
         if os.path.exists(self.index_path):
             self.index = faiss.read_index(self.index_path)
+        if os.path.exists(self.index_path + ".ids"):
+            with open(self.index_path + ".ids", "rb") as f:
+                self.id_map = pickle.load(f)
     def get_stats(self) -> dict:
         """Get index statistics"""

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ faiss-cpu==1.8.0
 python-multipart==0.0.6
 pydantic==2.6.1
 pydantic-settings==2.1.0
-setuptools>=68.0

 python-multipart==0.0.6
 pydantic==2.6.1
 pydantic-settings==2.1.0
+setuptools>=68.0
+openai==2.6.0