Spaces:

userx2000
/

cloudzy_ai_challenge

Sleeping

App Files Files Community

matinsn2000 commited on Oct 25

Commit

fa7dceb

1 Parent(s): 1c23b55

Used smolagents for describing image

Browse files

Files changed (2) hide show

cloudzy/agents/similar_image_retriever.py +79 -0
requirements.txt +3 -1

cloudzy/agents/similar_image_retriever.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from smolagents import CodeAgent, OpenAIServerModel
+from pathlib import Path
+from PIL import Image
+from dotenv import load_dotenv
+import os
+load_dotenv()
+class ImageAnalyzerAgent:
+    """Agent for analyzing images using Gemini with smolagents"""
+    def __init__(self):
+        """Initialize the agent with Gemini configuration"""
+        # Configure Gemini with smolagents using OpenAI-compatible endpoint
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY not found in environment variables. Get one at https://aistudio.google.com/apikey")
+        # Use Gemini with smolagents via OpenAI-compatible API
+        self.model = OpenAIServerModel(
+            model_id="gemini-2.0-flash",
+            api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
+            api_key=api_key
+        )
+        # Instantiate the agent
+        self.agent = CodeAgent(
+            tools=[],
+            model=self.model,
+            max_steps=20,
+            verbosity_level=2
+        )
+    def analyze_images(self, image_paths):
+        """
+        Load images from file paths and analyze them using the agent.
+        Args:
+            image_paths: List of Path objects or strings pointing to image files
+        Returns:
+            Agent response with image descriptions
+        """
+        # Convert strings to Path objects if needed
+        image_paths = [Path(path) if isinstance(path, str) else path for path in image_paths]
+        # Open and load images
+        images = [Image.open(img_path) for img_path in image_paths if img_path.exists()]
+        print(f"Loaded {len(images)} images from provided paths")
+        if not images:
+            print("No images found. Please provide valid image paths.")
+            return None
+        response = self.agent.run(
+            """
+            Describe these images to me:
+            """,
+            images=images
+        )
+        print("\n=== Agent Response ===")
+        print(response)
+        return response
+# Test with sample images
+if __name__ == "__main__":
+    uploads_dir = Path(__file__).parent.parent.parent / "uploads"
+    sample_image_paths = [
+        uploads_dir / "img_1_20251024_180707_942.jpg",
+        uploads_dir / "img_2_20251024_180749_372.jpeg",
+        uploads_dir / "img_3_20251024_180756_356.jpeg",
+    ]
+    agent = ImageAnalyzerAgent()
+    agent.analyze_images(sample_image_paths)

requirements.txt CHANGED Viewed

@@ -12,4 +12,6 @@ pydantic-settings==2.1.0
 setuptools>=68.0
 openai==2.6.0
 huggingface_hub
-decorator==5.1.1

 setuptools>=68.0
 openai==2.6.0
 huggingface_hub
+decorator==5.1.1
+smolagents[toolkit]
+google-generativeai