File size: 4,312 Bytes
1006fab
c6706bd
1006fab
d667f1f
 
c6706bd
1006fab
 
c6706bd
d667f1f
 
1006fab
d667f1f
1006fab
 
 
 
d667f1f
1006fab
 
 
c6706bd
1006fab
 
d667f1f
c6706bd
1006fab
 
 
 
c6706bd
1006fab
d667f1f
1006fab
 
 
 
 
 
 
 
 
 
 
 
 
d667f1f
 
 
 
 
 
 
 
 
1006fab
 
 
c6706bd
1006fab
 
 
d667f1f
1006fab
 
 
 
 
 
c6706bd
d667f1f
 
 
 
 
 
 
 
1006fab
c6706bd
8ad42f5
d667f1f
 
8ad42f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006fab
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import numpy as np
from huggingface_hub import InferenceClient
from typing import List, Dict, Tuple
import re

from dotenv import load_dotenv
load_dotenv()



class ImageEmbeddingGenerator:
    def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-8B"):
        """
        Initialize the embedding generator with a Hugging Face model.
        """
        self.client = InferenceClient(
            provider="nebius",
            api_key=os.environ["HF_TOKEN_1"],
        )
        self.model_name = model_name

    def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
        """
        Generate a 4096-d embedding for an image using its tags, description, and caption.

        Args:
            tags: List of tags related to the image
            description: Long descriptive text of the image
            caption: Short caption for the image

        Returns:
            embedding: 1D numpy array of shape (4096,), normalized to unit length
        """
        # Combine text fields into a single string
        text = " ".join(tags) + " " + description + " " + caption
        
        # Request embedding from Hugging Face
        result = self.client.feature_extraction(
            text,
            model=self.model_name,
        )
        
        # Convert to numpy array
        embedding = np.array(result, dtype=np.float32).reshape(-1)
        
        # Ensure shape is (4096,)
        if embedding.shape[0] != 4096:
            raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
        
        # Normalize to unit length (L2 normalization)
        # This ensures distances stay consistent across models and dimensions
        norm = np.linalg.norm(embedding)
        if norm > 0:
            embedding = embedding / norm
        
        return embedding
    

    def _embed_text(self, text: str) -> np.ndarray:
        """
        Internal helper to call Hugging Face feature_extraction and return a numpy array.
        Embeddings are normalized to unit length for consistent distance calculations.
        """
        result = self.client.feature_extraction(
            text,
            model=self.model_name,
        )
        embedding = np.array(result, dtype=np.float32).reshape(-1)

        if embedding.shape[0] != 4096:
            raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
        
        # Normalize to unit length (L2 normalization)
        norm = np.linalg.norm(embedding)
        if norm > 0:
            embedding = embedding / norm
        
        return embedding




class TextSummarizer:
    def __init__(self, model_name: str = "facebook/bart-large-cnn"):
        """
        Initialize the text summarizer with a Hugging Face model.
        """
        self.client = InferenceClient(
            provider="hf-inference",
            api_key=os.environ["HF_TOKEN_1"],
        )
        self.model_name = model_name

    def summarize(self, text: str) -> str:
        """
        Generate a summary of the given text.
        
        Args:
            text: Text to summarize
            
        Returns:
            summary: Generated summary string
        """
        if not text or text.strip() == "":
            return "Album of photos"
        
        try:
            result = self.client.summarization(
                text,
                model=self.model_name,
            )
            # Extract the summary text from the result object
            if isinstance(result, list) and len(result) > 0:
                return result[0].get("summary_text", str(result[0]))
            elif isinstance(result, dict):
                return result.get("summary_text", str(result))
            else:
                return str(result)
        except Exception as e:
            # Fallback if summarization fails
            return f"Collection: {text[:80]}..."

# Example usage:
if __name__ == "__main__":
    generator = ImageEmbeddingGenerator()
    
    tags = ["nature", "sun", "ice cream"]
    description = "A sunny day in the park with children enjoying ice cream."
    caption = "Sunny day with ice cream."
    
    embedding = generator.generate_embedding(tags, description, caption)
    print("Embedding shape:", embedding.shape)