Spaces:
Sleeping
Sleeping
File size: 2,635 Bytes
c6706bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
"""AI utilities for generating tags, captions, and embeddings"""
import numpy as np
from typing import List, Tuple
import random
def generate_tags(filename: str) -> List[str]:
"""
Generate tags for an image based on filename.
In production, this would use CLIP or similar models.
Currently using placeholder logic.
"""
# Extract meaningful words from filename
name_parts = filename.lower().replace("_", " ").replace("-", " ").split()
name_parts = [p.replace(".jpg", "").replace(".png", "").replace(".jpeg", "")
for p in name_parts if p]
# Common image tags for demo
common_tags = [
"photo", "image", "landscape", "portrait", "nature", "architecture",
"people", "animal", "food", "object", "abstract", "text", "sunset",
"mountain", "beach", "forest", "urban", "indoor", "outdoor"
]
# Select random subset of common tags + filename parts
tags = list(set(name_parts[:2] + random.sample(common_tags, min(3, len(common_tags)))))
return tags[:5] # Return up to 5 tags
def generate_caption(filename: str, tags: List[str]) -> str:
"""
Generate a caption for an image.
In production, this would use BLIP or similar models.
Currently using placeholder logic.
"""
caption_templates = [
"A beautiful {tag} photograph",
"Captured moment: {tag}",
"Scenic view of {tag}",
"Amazing {tag} scene",
"Photography: {tag} collection",
]
tag = tags[0] if tags else "image"
template = random.choice(caption_templates)
return template.format(tag=tag)
def generate_embedding(filename: str, tags: List[str], caption: str) -> np.ndarray:
"""
Generate a 512-dimensional embedding for semantic search.
In production, this would use CLIP or similar models.
Currently using placeholder random embeddings (reproducible from filename).
"""
# Create a reproducible random embedding based on filename
# In production: use CLIP or similar to generate real embeddings
random.seed(hash(filename) % (2**32))
embedding = np.random.randn(512).astype(np.float32)
# Normalize to unit vector
embedding = embedding / np.linalg.norm(embedding)
return embedding
def generate_filename_embedding(filename: str) -> np.ndarray:
"""
Generate a deterministic embedding from filename for testing.
Ensures same filename always gets same embedding.
"""
random.seed(hash(filename) % (2**32))
embedding = np.random.randn(512).astype(np.float32)
embedding = embedding / np.linalg.norm(embedding)
return embedding |