Spaces:
Running
on
Zero
Running
on
Zero
| import random | |
| import hashlib | |
| import numpy as np | |
| import sqlite3 | |
| import re | |
| import traceback | |
| from typing import List, Dict, Tuple, Optional, Any | |
| from dataclasses import dataclass | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from dog_database import get_dog_description | |
| from breed_health_info import breed_health_info | |
| from breed_noise_info import breed_noise_info | |
| class BreedDescriptionVector: | |
| """品種描述向量的資料結構""" | |
| breed_name: str | |
| description_text: str | |
| embedding: np.ndarray | |
| characteristics: Dict[str, Any] | |
| class SemanticVectorManager: | |
| """ | |
| 語義向量管理器 | |
| 處理 SBERT 模型初始化、品種向量化建構和品種描述生成 | |
| """ | |
| def __init__(self): | |
| """初始化語義向量管理器""" | |
| self.model_name = 'all-MiniLM-L6-v2' | |
| self.sbert_model = None | |
| self._sbert_loading_attempted = False | |
| self.breed_vectors = {} | |
| self.breed_list = self._get_breed_list() | |
| # 延遲SBERT模型載入直到需要時才在GPU環境中進行 | |
| print("SemanticVectorManager initialized (SBERT loading deferred)") | |
| def _get_breed_list(self) -> List[str]: | |
| """從資料庫獲取品種清單""" | |
| try: | |
| conn = sqlite3.connect('animal_detector.db') | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") | |
| breeds = [row[0] for row in cursor.fetchall()] | |
| cursor.close() | |
| conn.close() | |
| # 過濾掉野生動物品種 | |
| breeds = [breed for breed in breeds if breed != 'Dhole'] | |
| return breeds | |
| except Exception as e: | |
| print(f"Error getting breed list: {str(e)}") | |
| return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', | |
| 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier'] | |
| def _initialize_model(self): | |
| """初始化 SBERT 模型,包含容錯機制 - 設計用於ZeroGPU相容性""" | |
| if self.sbert_model is not None or self._sbert_loading_attempted: | |
| return self.sbert_model | |
| try: | |
| print("Loading SBERT model in GPU context...") | |
| # 如果主要模型失敗,嘗試不同的模型名稱 | |
| model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2'] | |
| for model_name in model_options: | |
| try: | |
| # 明確指定設備以處理ZeroGPU環境 | |
| import torch | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| self.sbert_model = SentenceTransformer(model_name, device=device) | |
| self.model_name = model_name | |
| print(f"SBERT model {model_name} loaded successfully on {device}") | |
| return self.sbert_model | |
| except Exception as model_e: | |
| print(f"Failed to load {model_name}: {str(model_e)}") | |
| continue | |
| # 如果所有模型都失敗 | |
| print("All SBERT models failed to load. Using basic text matching fallback.") | |
| self.sbert_model = None | |
| return None | |
| except Exception as e: | |
| print(f"Failed to initialize any SBERT model: {str(e)}") | |
| print(traceback.format_exc()) | |
| print("Will provide basic text-based recommendations without embeddings") | |
| self.sbert_model = None | |
| return None | |
| finally: | |
| self._sbert_loading_attempted = True | |
| def _create_breed_description(self, breed: str) -> str: | |
| """為品種創建包含所有關鍵特徵的全面自然語言描述""" | |
| try: | |
| # 獲取所有信息來源 | |
| breed_info = get_dog_description(breed) or {} | |
| health_info = breed_health_info.get(breed, {}) if breed_health_info else {} | |
| noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {} | |
| breed_display_name = breed.replace('_', ' ') | |
| description_parts = [] | |
| # 1. 基本尺寸和身體特徵 | |
| size = breed_info.get('Size', 'medium').lower() | |
| description_parts.append(f"{breed_display_name} is a {size} sized dog breed") | |
| # 2. 氣質和個性(匹配的關鍵因素) | |
| temperament = breed_info.get('Temperament', '') | |
| if temperament: | |
| description_parts.append(f"with a {temperament.lower()} temperament") | |
| # 3. 運動和活動水平(公寓居住的關鍵因素) | |
| exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() | |
| if 'high' in exercise_needs or 'very high' in exercise_needs: | |
| description_parts.append("requiring high daily exercise and mental stimulation") | |
| elif 'low' in exercise_needs or 'minimal' in exercise_needs: | |
| description_parts.append("with minimal exercise requirements, suitable for apartment living") | |
| else: | |
| description_parts.append("with moderate exercise needs") | |
| # 4. 噪音特徵(安靜需求的關鍵因素) | |
| noise_level = noise_info.get('noise_level', 'moderate').lower() | |
| if 'low' in noise_level or 'quiet' in noise_level: | |
| description_parts.append("known for being quiet and rarely barking") | |
| elif 'high' in noise_level or 'loud' in noise_level: | |
| description_parts.append("tends to be vocal and bark frequently") | |
| else: | |
| description_parts.append("with moderate barking tendencies") | |
| # 5. 居住空間相容性 | |
| if size in ['small', 'tiny']: | |
| description_parts.append("excellent for small apartments and limited spaces") | |
| elif size in ['large', 'giant']: | |
| description_parts.append("requiring large living spaces and preferably a yard") | |
| else: | |
| description_parts.append("adaptable to various living situations") | |
| # 6. 美容和維護 | |
| grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() | |
| if 'high' in grooming_needs: | |
| description_parts.append("requiring regular professional grooming") | |
| elif 'low' in grooming_needs: | |
| description_parts.append("with minimal grooming requirements") | |
| else: | |
| description_parts.append("with moderate grooming needs") | |
| # 7. 家庭相容性 | |
| good_with_children = breed_info.get('Good with Children', 'Yes') | |
| if good_with_children == 'Yes': | |
| description_parts.append("excellent with children and families") | |
| else: | |
| description_parts.append("better suited for adult households") | |
| # 8. 智力和可訓練性(從資料庫描述中提取) | |
| intelligence_keywords = [] | |
| description_text = breed_info.get('Description', '').lower() | |
| if description_text: | |
| # 從描述中提取智力指標 | |
| if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']): | |
| intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner']) | |
| elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']): | |
| intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train']) | |
| else: | |
| intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency']) | |
| # 從描述中提取工作/用途特徵 | |
| if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']): | |
| intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented']) | |
| elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']): | |
| intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused']) | |
| # 添加智力背景到描述中 | |
| if intelligence_keywords: | |
| description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}") | |
| # 9. 特殊特徵和用途(使用資料庫挖掘進行增強) | |
| if breed_info.get('Description'): | |
| desc = breed_info.get('Description', '')[:150] # 增加到 150 字元以提供更多背景 | |
| if desc: | |
| # 從描述中提取關鍵特徵以便更好的語義匹配 | |
| desc_lower = desc.lower() | |
| key_traits = [] | |
| # 從描述中提取關鍵行為特徵 | |
| if 'friendly' in desc_lower: | |
| key_traits.append('friendly') | |
| if 'gentle' in desc_lower: | |
| key_traits.append('gentle') | |
| if 'energetic' in desc_lower or 'active' in desc_lower: | |
| key_traits.append('energetic') | |
| if 'calm' in desc_lower or 'peaceful' in desc_lower: | |
| key_traits.append('calm') | |
| if 'protective' in desc_lower or 'guard' in desc_lower: | |
| key_traits.append('protective') | |
| trait_text = f" and {', '.join(key_traits)}" if key_traits else "" | |
| description_parts.append(f"Known for: {desc.lower()}{trait_text}") | |
| # 10. 照護水平需求 | |
| try: | |
| care_level = breed_info.get('Care Level', 'moderate') | |
| if isinstance(care_level, str): | |
| description_parts.append(f"requiring {care_level.lower()} overall care level") | |
| else: | |
| description_parts.append("requiring moderate overall care level") | |
| except Exception as e: | |
| print(f"Error processing care level for {breed}: {str(e)}") | |
| description_parts.append("requiring moderate overall care level") | |
| # 11. 壽命資訊 | |
| try: | |
| lifespan = breed_info.get('Lifespan', '10-12 years') | |
| if lifespan and isinstance(lifespan, str) and lifespan.strip(): | |
| description_parts.append(f"with a typical lifespan of {lifespan}") | |
| else: | |
| description_parts.append("with a typical lifespan of 10-12 years") | |
| except Exception as e: | |
| print(f"Error processing lifespan for {breed}: {str(e)}") | |
| description_parts.append("with a typical lifespan of 10-12 years") | |
| # 創建全面的描述 | |
| full_description = '. '.join(description_parts) + '.' | |
| # 添加全面的關鍵字以便更好的語義匹配 | |
| keywords = [] | |
| # 基本品種名稱關鍵字 | |
| keywords.extend([word.lower() for word in breed_display_name.split()]) | |
| # 氣質關鍵字 | |
| if temperament: | |
| keywords.extend([word.lower().strip(',') for word in temperament.split()]) | |
| # 基於尺寸的關鍵字 | |
| if 'small' in size or 'tiny' in size: | |
| keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap']) | |
| elif 'large' in size or 'giant' in size: | |
| keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor']) | |
| else: | |
| keywords.extend(['medium', 'moderate', 'average', 'balanced']) | |
| # 活動水平關鍵字 | |
| exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() | |
| if 'high' in exercise_needs: | |
| keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic']) | |
| elif 'low' in exercise_needs: | |
| keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary']) | |
| else: | |
| keywords.extend(['moderate', 'balanced', 'walks', 'regular']) | |
| # 噪音水平關鍵字 | |
| noise_level = noise_info.get('noise_level', 'moderate').lower() | |
| if 'quiet' in noise_level or 'low' in noise_level: | |
| keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise']) | |
| elif 'high' in noise_level or 'loud' in noise_level: | |
| keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog']) | |
| # 居住情況關鍵字 | |
| if size in ['small', 'tiny'] and 'low' in exercise_needs: | |
| keywords.extend(['apartment', 'city', 'urban', 'small-space']) | |
| if size in ['large', 'giant'] or 'high' in exercise_needs: | |
| keywords.extend(['house', 'yard', 'suburban', 'rural', 'space']) | |
| # 家庭關鍵字 | |
| good_with_children = breed_info.get('Good with Children', 'Yes') | |
| if good_with_children == 'Yes': | |
| keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle']) | |
| # 智力和可訓練性關鍵字(從資料庫描述挖掘) | |
| if intelligence_keywords: | |
| keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()]) | |
| # 美容相關關鍵字(增強) | |
| grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() | |
| if 'high' in grooming_needs: | |
| keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care']) | |
| elif 'low' in grooming_needs: | |
| keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go']) | |
| else: | |
| keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care']) | |
| # 基於壽命的關鍵字 | |
| lifespan = breed_info.get('Lifespan', '10-12 years') | |
| if lifespan and isinstance(lifespan, str): | |
| try: | |
| # 從壽命字符串中提取年數(例如 "10-12 years" 或 "12-15 years") | |
| import re | |
| years = re.findall(r'\d+', lifespan) | |
| if years: | |
| avg_years = sum(int(y) for y in years) / len(years) | |
| if avg_years >= 14: | |
| keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan']) | |
| elif avg_years <= 8: | |
| keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care']) | |
| else: | |
| keywords.extend(['average-lifespan', 'moderate-longevity']) | |
| except: | |
| keywords.extend(['average-lifespan']) | |
| # 將關鍵字添加到描述中以便更好的語義匹配 | |
| unique_keywords = list(set(keywords)) | |
| keyword_text = ' '.join(unique_keywords) | |
| full_description += f" Additional context: {keyword_text}" | |
| return full_description | |
| except Exception as e: | |
| print(f"Error creating description for {breed}: {str(e)}") | |
| return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics." | |
| def _build_breed_vectors(self): | |
| """為所有品種建立向量表示 - 延遲調用當需要時""" | |
| try: | |
| print("Building breed vector database...") | |
| # 初始化模型如果尚未完成 | |
| if self.sbert_model is None: | |
| self._initialize_model() | |
| # 如果模型不可用則跳過 | |
| if self.sbert_model is None: | |
| print("SBERT model not available, skipping vector building") | |
| return | |
| for breed in self.breed_list: | |
| description = self._create_breed_description(breed) | |
| # 生成嵌入向量 | |
| embedding = self.sbert_model.encode(description, convert_to_tensor=False) | |
| # 獲取品種特徵 | |
| breed_info = get_dog_description(breed) | |
| characteristics = { | |
| 'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium', | |
| 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', | |
| 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', | |
| 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', | |
| 'temperament': breed_info.get('Temperament', '') if breed_info else '' | |
| } | |
| self.breed_vectors[breed] = BreedDescriptionVector( | |
| breed_name=breed, | |
| description_text=description, | |
| embedding=embedding, | |
| characteristics=characteristics | |
| ) | |
| print(f"Successfully built {len(self.breed_vectors)} breed vectors") | |
| except Exception as e: | |
| print(f"Error building breed vectors: {str(e)}") | |
| print(traceback.format_exc()) | |
| raise | |
| def get_breed_vectors(self) -> Dict[str, BreedDescriptionVector]: | |
| """獲取所有品種向量""" | |
| # 確保向量已建構 | |
| if not self.breed_vectors: | |
| self._build_breed_vectors() | |
| return self.breed_vectors | |
| def get_sbert_model(self) -> Optional[SentenceTransformer]: | |
| """獲取 SBERT 模型""" | |
| return self.sbert_model | |
| def get_breed_list(self) -> List[str]: | |
| """獲取品種清單""" | |
| return self.breed_list | |
| def is_model_available(self) -> bool: | |
| """檢查 SBERT 模型是否可用""" | |
| return self.sbert_model is not None | |
| def encode_text(self, text: str) -> np.ndarray: | |
| """使用 SBERT 模型編碼文本""" | |
| # 初始化模型如果尚未完成 | |
| if self.sbert_model is None: | |
| self._initialize_model() | |
| if self.sbert_model is None: | |
| raise RuntimeError("SBERT model not available") | |
| return self.sbert_model.encode(text, convert_to_tensor=False) | |