penta-vit-experiments / legacy /vit_zana_v4_l1.py

Rename vit_zana_v4_l1.py to legacy/vit_zana_v4_l1.py

6f551f4 verified about 2 months ago

13.1 kB

	"""
	Baseline Vision Transformer with Frozen Pentachora Embeddings
	Adapted for L1-normalized pentachora vertices
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np
	from einops import rearrange
	import math
	from typing import Optional, Tuple, Dict, Any


	class PentachoraEmbedding(nn.Module):
	"""
	A single frozen pentachora embedding (5 vertices in geometric space).
	Supports both L1 and L2 normalized vertices.
	"""

	def __init__(self, vertices: torch.Tensor, norm_type: str = 'l1'):
	super().__init__()

	self.embed_dim = vertices.shape[-1]
	self.norm_type = norm_type

	# Store provided vertices as frozen buffer
	self.register_buffer('vertices', vertices)
	self.vertices.requires_grad = False

	# Precompute normalized versions and centroid
	with torch.no_grad():
	# For L1-normalized data, use L1 norm for consistency
	if norm_type == 'l1':
	# L1 normalize (sum of abs values = 1)
	self.register_buffer('vertices_norm',
	vertices / (vertices.abs().sum(dim=-1, keepdim=True) + 1e-8))
	else:
	# L2 normalize (euclidean norm = 1)
	self.register_buffer('vertices_norm', F.normalize(self.vertices, dim=-1))

	self.register_buffer('centroid', self.vertices.mean(dim=0))

	# Centroid normalization matches vertex normalization
	if norm_type == 'l1':
	self.register_buffer('centroid_norm',
	self.centroid / (self.centroid.abs().sum() + 1e-8))
	else:
	self.register_buffer('centroid_norm', F.normalize(self.centroid, dim=-1))

	def get_vertices(self) -> torch.Tensor:
	"""Get all 5 vertices."""
	return self.vertices

	def get_centroid(self) -> torch.Tensor:
	"""Get the centroid of the pentachora."""
	return self.centroid

	def compute_rose_score(self, features: torch.Tensor) -> torch.Tensor:
	"""
	Compute Rose similarity score with this pentachora.
	Scaled appropriately for L1 norm.
	"""
	verts = self.vertices.unsqueeze(0) # [1, 5, D]
	if features.dim() == 1:
	features = features.unsqueeze(0)

	B = features.shape[0]
	if B > 1:
	verts = verts.expand(B, -1, -1)

	# For L1 norm, scale the rose score appropriately
	score = PentachoronStabilizer.rose_score_magnitude(features, verts)
	if self.norm_type == 'l1':
	# L1 norm produces smaller values, so amplify the signal
	score = score * 10.0
	return score

	def compute_similarity(self, features: torch.Tensor, mode: str = 'centroid') -> torch.Tensor:
	"""
	Compute similarity between features and this pentachora.
	"""
	if mode == 'rose':
	return self.compute_rose_score(features)

	# Normalize features according to norm type
	if self.norm_type == 'l1':
	features_norm = features / (features.abs().sum(dim=-1, keepdim=True) + 1e-8)
	else:
	features_norm = F.normalize(features, dim=-1)

	if mode == 'centroid':
	# Dot product with centroid
	sim = torch.sum(features_norm * self.centroid_norm, dim=-1)
	# Scale up L1 similarities to be comparable to L2
	if self.norm_type == 'l1':
	sim = sim * 10.0
	return sim
	else: # mode == 'max'
	# Max similarity across vertices
	sims = torch.matmul(features_norm, self.vertices_norm.T)
	if self.norm_type == 'l1':
	sims = sims * 10.0
	return sims.max(dim=-1)[0]


	class TransformerBlock(nn.Module):
	"""Standard transformer block with multi-head attention and MLP."""

	def __init__(
	self,
	dim: int,
	num_heads: int = 8,
	mlp_ratio: float = 4.0,
	dropout: float = 0.0,
	attn_dropout: float = 0.0
	):
	super().__init__()

	self.norm1 = nn.LayerNorm(dim)
	self.attn = nn.MultiheadAttention(
	dim,
	num_heads,
	dropout=attn_dropout,
	batch_first=True
	)

	self.norm2 = nn.LayerNorm(dim)
	mlp_hidden_dim = int(dim * mlp_ratio)
	self.mlp = nn.Sequential(
	nn.Linear(dim, mlp_hidden_dim),
	nn.GELU(),
	nn.Dropout(dropout),
	nn.Linear(mlp_hidden_dim, dim),
	nn.Dropout(dropout)
	)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	# Self-attention
	x_norm = self.norm1(x)
	attn_out, _ = self.attn(x_norm, x_norm, x_norm)
	x = x + attn_out

	# MLP
	x = x + self.mlp(self.norm2(x))

	return x


	class BaselineViT(nn.Module):
	"""
	Vision Transformer with frozen pentachora embeddings.
	Supports L1-normalized pentachora.
	"""

	def __init__(
	self,
	pentachora_list: list, # List of torch.Tensor, each [5, vocab_dim]
	vocab_dim: int = 256,
	img_size: int = 32,
	patch_size: int = 4,
	embed_dim: int = 512,
	depth: int = 12,
	num_heads: int = 8,
	mlp_ratio: float = 4.0,
	dropout: float = 0.0,
	attn_dropout: float = 0.0,
	similarity_mode: str = 'rose', # 'centroid', 'max', or 'rose'
	norm_type: str = 'l1' # 'l1' or 'l2' normalization
	):
	super().__init__()

	# Validate pentachora list
	assert isinstance(pentachora_list, list), f"Expected list, got {type(pentachora_list)}"
	assert len(pentachora_list) > 0, "Empty pentachora list"

	for i, penta in enumerate(pentachora_list):
	assert isinstance(penta, torch.Tensor), f"Item {i} is not a tensor"

	self.num_classes = len(pentachora_list)
	self.embed_dim = embed_dim
	self.num_patches = (img_size // patch_size) ** 2
	self.similarity_mode = similarity_mode
	self.pentachora_dim = vocab_dim
	self.norm_type = norm_type

	# Create individual pentachora embeddings from list
	self.class_pentachora = nn.ModuleList([
	PentachoraEmbedding(vertices=penta, norm_type=norm_type)
	for penta in pentachora_list
	])

	# Patch embedding
	self.patch_embed = nn.Conv2d(3, embed_dim, kernel_size=patch_size, stride=patch_size)

	# CLS token - learnable
	self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))

	# Position embeddings
	self.pos_embed = nn.Parameter(torch.zeros(1, 1 + self.num_patches, embed_dim))
	self.pos_drop = nn.Dropout(dropout)

	# Transformer blocks
	self.blocks = nn.ModuleList([
	TransformerBlock(
	dim=embed_dim,
	num_heads=num_heads,
	mlp_ratio=mlp_ratio,
	dropout=dropout,
	attn_dropout=attn_dropout
	)
	for i in range(depth)
	])

	# Final norm
	self.norm = nn.LayerNorm(embed_dim)

	# Project to pentachora dimension if needed
	if self.pentachora_dim != embed_dim:
	self.to_pentachora_dim = nn.Linear(embed_dim, self.pentachora_dim)
	else:
	self.to_pentachora_dim = nn.Identity()

	# Temperature for similarity-based classification
	# For L1 norm, start with lower temperature since similarities are scaled
	if norm_type == 'l1':
	self.temperature = nn.Parameter(torch.zeros(1)) # exp(0) = 1
	else:
	self.temperature = nn.Parameter(torch.ones(1) * np.log(1/0.07))

	# Precompute all centroids for efficiency
	self.register_buffer(
	'all_centroids',
	torch.stack([penta.centroid for penta in self.class_pentachora])
	)

	# Normalize centroids according to norm type
	if norm_type == 'l1':
	centroids_normalized = self.all_centroids / (
	self.all_centroids.abs().sum(dim=-1, keepdim=True) + 1e-8)
	else:
	centroids_normalized = F.normalize(self.all_centroids, dim=-1)

	self.register_buffer('all_centroids_norm', centroids_normalized)

	# Initialize weights
	self.init_weights()

	def init_weights(self):
	"""Initialize model weights."""
	nn.init.trunc_normal_(self.cls_token, std=0.02)
	nn.init.trunc_normal_(self.pos_embed, std=0.02)

	for m in self.modules():
	if isinstance(m, nn.Linear):
	nn.init.trunc_normal_(m.weight, std=0.02)
	if m.bias is not None:
	nn.init.zeros_(m.bias)
	elif isinstance(m, nn.LayerNorm):
	nn.init.ones_(m.weight)
	nn.init.zeros_(m.bias)

	def get_class_centroids(self) -> torch.Tensor:
	return self.all_centroids_norm

	def compute_pentachora_similarities(self, features: torch.Tensor) -> torch.Tensor:
	"""
	Compute similarities between features and all class pentachora.
	Properly scaled for L1 or L2 norm.
	"""
	if self.similarity_mode == 'rose':
	# Stack all vertices into single tensor for batch Rose scoring
	all_vertices = torch.stack([penta.vertices for penta in self.class_pentachora])
	features_exp = features.unsqueeze(1).expand(-1, self.num_classes, -1)
	scores = PentachoronStabilizer.rose_score_magnitude(
	features_exp.reshape(-1, self.pentachora_dim),
	all_vertices.repeat(features.shape[0], 1, 1)
	).reshape(features.shape[0], -1)

	# Scale for L1 norm
	if self.norm_type == 'l1':
	scores = scores * 10.0
	return scores
	else:
	# Normalize features according to norm type
	if self.norm_type == 'l1':
	features_norm = features / (features.abs().sum(dim=-1, keepdim=True) + 1e-8)
	else:
	features_norm = F.normalize(features, dim=-1)

	centroids = self.get_class_centroids()
	sims = torch.matmul(features_norm, centroids.T)

	# Scale for L1 norm
	if self.norm_type == 'l1':
	sims = sims * 10.0
	return sims

	def forward_features(self, x: torch.Tensor) -> torch.Tensor:
	"""Extract features from images."""
	B = x.shape[0]

	# Patch embedding
	x = self.patch_embed(x) # [B, embed_dim, H', W']
	x = x.flatten(2).transpose(1, 2) # [B, num_patches, embed_dim]

	# Add CLS token
	cls_tokens = self.cls_token.expand(B, -1, -1)
	x = torch.cat([cls_tokens, x], dim=1)

	# Add position embeddings
	x = x + self.pos_embed
	x = self.pos_drop(x)

	# Apply transformer blocks
	for block in self.blocks:
	x = block(x)

	# Final norm
	x = self.norm(x)

	# Return CLS token
	return x[:, 0]

	def forward(self, x: torch.Tensor, return_features: bool = False) -> Dict[str, torch.Tensor]:
	"""
	Forward pass.

	Returns dict with:
	- logits: classification logits
	- features: CLS features (if return_features=True)
	- features_proj: projected features in pentachora space
	- similarities: raw similarities to pentachora
	"""
	features = self.forward_features(x)

	output = {}

	# Project to pentachora dimension
	features_proj = self.to_pentachora_dim(features)

	# Apply appropriate normalization for projected features
	if self.norm_type == 'l1':
	# L1 normalize the projected features
	features_proj = features_proj / (features_proj.abs().sum(dim=-1, keepdim=True) + 1e-8)

	# Compute similarities
	similarities = self.compute_pentachora_similarities(features_proj)

	# Scale by temperature
	logits = similarities * self.temperature.exp()

	output['logits'] = logits
	output['similarities'] = similarities

	if return_features:
	output['features'] = features # Original transformer features
	output['features_proj'] = features_proj # Projected features

	return output


	# Test - requires external setup
	if __name__ == "__main__":
	print("BaselineViT requires:")
	print(" 1. PentachoronStabilizer loaded externally")
	print(" 2. pentachora_batch tensor [num_classes, 5, vocab_dim]")
	print("\nNo random initialization. No fallbacks.")