Spaces:
Sleeping
Sleeping
File size: 4,312 Bytes
1006fab c6706bd 1006fab d667f1f c6706bd 1006fab c6706bd d667f1f 1006fab d667f1f 1006fab d667f1f 1006fab c6706bd 1006fab d667f1f c6706bd 1006fab c6706bd 1006fab d667f1f 1006fab d667f1f 1006fab c6706bd 1006fab d667f1f 1006fab c6706bd d667f1f 1006fab c6706bd 8ad42f5 d667f1f 8ad42f5 1006fab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import os
import numpy as np
from huggingface_hub import InferenceClient
from typing import List, Dict, Tuple
import re
from dotenv import load_dotenv
load_dotenv()
class ImageEmbeddingGenerator:
def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-8B"):
"""
Initialize the embedding generator with a Hugging Face model.
"""
self.client = InferenceClient(
provider="nebius",
api_key=os.environ["HF_TOKEN_1"],
)
self.model_name = model_name
def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
"""
Generate a 4096-d embedding for an image using its tags, description, and caption.
Args:
tags: List of tags related to the image
description: Long descriptive text of the image
caption: Short caption for the image
Returns:
embedding: 1D numpy array of shape (4096,), normalized to unit length
"""
# Combine text fields into a single string
text = " ".join(tags) + " " + description + " " + caption
# Request embedding from Hugging Face
result = self.client.feature_extraction(
text,
model=self.model_name,
)
# Convert to numpy array
embedding = np.array(result, dtype=np.float32).reshape(-1)
# Ensure shape is (4096,)
if embedding.shape[0] != 4096:
raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
# Normalize to unit length (L2 normalization)
# This ensures distances stay consistent across models and dimensions
norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm
return embedding
def _embed_text(self, text: str) -> np.ndarray:
"""
Internal helper to call Hugging Face feature_extraction and return a numpy array.
Embeddings are normalized to unit length for consistent distance calculations.
"""
result = self.client.feature_extraction(
text,
model=self.model_name,
)
embedding = np.array(result, dtype=np.float32).reshape(-1)
if embedding.shape[0] != 4096:
raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
# Normalize to unit length (L2 normalization)
norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm
return embedding
class TextSummarizer:
def __init__(self, model_name: str = "facebook/bart-large-cnn"):
"""
Initialize the text summarizer with a Hugging Face model.
"""
self.client = InferenceClient(
provider="hf-inference",
api_key=os.environ["HF_TOKEN_1"],
)
self.model_name = model_name
def summarize(self, text: str) -> str:
"""
Generate a summary of the given text.
Args:
text: Text to summarize
Returns:
summary: Generated summary string
"""
if not text or text.strip() == "":
return "Album of photos"
try:
result = self.client.summarization(
text,
model=self.model_name,
)
# Extract the summary text from the result object
if isinstance(result, list) and len(result) > 0:
return result[0].get("summary_text", str(result[0]))
elif isinstance(result, dict):
return result.get("summary_text", str(result))
else:
return str(result)
except Exception as e:
# Fallback if summarization fails
return f"Collection: {text[:80]}..."
# Example usage:
if __name__ == "__main__":
generator = ImageEmbeddingGenerator()
tags = ["nature", "sun", "ice cream"]
description = "A sunny day in the park with children enjoying ice cream."
caption = "Sunny day with ice cream."
embedding = generator.generate_embedding(tags, description, caption)
print("Embedding shape:", embedding.shape)
|