File size: 2,538 Bytes
fa7dceb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from smolagents import CodeAgent, OpenAIServerModel
from pathlib import Path
from PIL import Image
from dotenv import load_dotenv
import os

load_dotenv()


class ImageAnalyzerAgent:
    """Agent for analyzing images using Gemini with smolagents"""
    
    def __init__(self):
        """Initialize the agent with Gemini configuration"""
        # Configure Gemini with smolagents using OpenAI-compatible endpoint
        api_key = os.getenv("GEMINI_API_KEY")
        if not api_key:
            raise ValueError("GEMINI_API_KEY not found in environment variables. Get one at https://aistudio.google.com/apikey")
        
        # Use Gemini with smolagents via OpenAI-compatible API
        self.model = OpenAIServerModel(
            model_id="gemini-2.0-flash",
            api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
            api_key=api_key
        )
        
        # Instantiate the agent
        self.agent = CodeAgent(
            tools=[],
            model=self.model,
            max_steps=20,
            verbosity_level=2
        )
    
    def analyze_images(self, image_paths):
        """
        Load images from file paths and analyze them using the agent.
        
        Args:
            image_paths: List of Path objects or strings pointing to image files
            
        Returns:
            Agent response with image descriptions
        """
        # Convert strings to Path objects if needed
        image_paths = [Path(path) if isinstance(path, str) else path for path in image_paths]
        
        # Open and load images
        images = [Image.open(img_path) for img_path in image_paths if img_path.exists()]
        
        print(f"Loaded {len(images)} images from provided paths")
        
        if not images:
            print("No images found. Please provide valid image paths.")
            return None
        
        response = self.agent.run(
            """
            Describe these images to me:
            """,
            images=images
        )
        
        print("\n=== Agent Response ===")
        print(response)
        return response


# Test with sample images
if __name__ == "__main__":
    uploads_dir = Path(__file__).parent.parent.parent / "uploads"
    sample_image_paths = [
        uploads_dir / "img_1_20251024_180707_942.jpg",
        uploads_dir / "img_2_20251024_180749_372.jpeg",
        uploads_dir / "img_3_20251024_180756_356.jpeg",
    ]
    
    agent = ImageAnalyzerAgent()
    agent.analyze_images(sample_image_paths)