matinsn2000 commited on
Commit
fa7dceb
·
1 Parent(s): 1c23b55

Used smolagents for describing image

Browse files
cloudzy/agents/similar_image_retriever.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import CodeAgent, OpenAIServerModel
2
+ from pathlib import Path
3
+ from PIL import Image
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ load_dotenv()
8
+
9
+
10
+ class ImageAnalyzerAgent:
11
+ """Agent for analyzing images using Gemini with smolagents"""
12
+
13
+ def __init__(self):
14
+ """Initialize the agent with Gemini configuration"""
15
+ # Configure Gemini with smolagents using OpenAI-compatible endpoint
16
+ api_key = os.getenv("GEMINI_API_KEY")
17
+ if not api_key:
18
+ raise ValueError("GEMINI_API_KEY not found in environment variables. Get one at https://aistudio.google.com/apikey")
19
+
20
+ # Use Gemini with smolagents via OpenAI-compatible API
21
+ self.model = OpenAIServerModel(
22
+ model_id="gemini-2.0-flash",
23
+ api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
24
+ api_key=api_key
25
+ )
26
+
27
+ # Instantiate the agent
28
+ self.agent = CodeAgent(
29
+ tools=[],
30
+ model=self.model,
31
+ max_steps=20,
32
+ verbosity_level=2
33
+ )
34
+
35
+ def analyze_images(self, image_paths):
36
+ """
37
+ Load images from file paths and analyze them using the agent.
38
+
39
+ Args:
40
+ image_paths: List of Path objects or strings pointing to image files
41
+
42
+ Returns:
43
+ Agent response with image descriptions
44
+ """
45
+ # Convert strings to Path objects if needed
46
+ image_paths = [Path(path) if isinstance(path, str) else path for path in image_paths]
47
+
48
+ # Open and load images
49
+ images = [Image.open(img_path) for img_path in image_paths if img_path.exists()]
50
+
51
+ print(f"Loaded {len(images)} images from provided paths")
52
+
53
+ if not images:
54
+ print("No images found. Please provide valid image paths.")
55
+ return None
56
+
57
+ response = self.agent.run(
58
+ """
59
+ Describe these images to me:
60
+ """,
61
+ images=images
62
+ )
63
+
64
+ print("\n=== Agent Response ===")
65
+ print(response)
66
+ return response
67
+
68
+
69
+ # Test with sample images
70
+ if __name__ == "__main__":
71
+ uploads_dir = Path(__file__).parent.parent.parent / "uploads"
72
+ sample_image_paths = [
73
+ uploads_dir / "img_1_20251024_180707_942.jpg",
74
+ uploads_dir / "img_2_20251024_180749_372.jpeg",
75
+ uploads_dir / "img_3_20251024_180756_356.jpeg",
76
+ ]
77
+
78
+ agent = ImageAnalyzerAgent()
79
+ agent.analyze_images(sample_image_paths)
requirements.txt CHANGED
@@ -12,4 +12,6 @@ pydantic-settings==2.1.0
12
  setuptools>=68.0
13
  openai==2.6.0
14
  huggingface_hub
15
- decorator==5.1.1
 
 
 
12
  setuptools>=68.0
13
  openai==2.6.0
14
  huggingface_hub
15
+ decorator==5.1.1
16
+ smolagents[toolkit]
17
+ google-generativeai