diff --git a/app.py b/app.py index de56728219b2784fe3d417a63dabd701f408e150..877a944a084280095dd8b71674721de18d1252be 100644 --- a/app.py +++ b/app.py @@ -10,6 +10,11 @@ from torchvision.ops import nms, box_iou import torch.nn.functional as F from torchvision import transforms from PIL import Image, ImageDraw, ImageFont, ImageFilter +from sentence_transformers import SentenceTransformer +from urllib.parse import quote +from ultralytics import YOLO +import asyncio +import traceback from breed_health_info import breed_health_info from breed_noise_info import breed_noise_info from dog_database import get_dog_description @@ -20,7 +25,7 @@ from search_history import create_history_tab, create_history_component from styles import get_css_styles from breed_detection import create_detection_tab from breed_comparison import create_comparison_tab -from breed_recommendation import create_recommendation_tab +from breed_recommendation_enhanced import create_recommendation_tab from breed_visualization import create_visualization_tab from style_transfer import DogStyleTransfer, create_style_transfer_tab from html_templates import ( @@ -36,23 +41,24 @@ from html_templates import ( get_akc_breeds_link ) from model_architecture import BaseModel, dog_breeds -from urllib.parse import quote -from ultralytics import YOLO -import asyncio -import traceback + history_manager = UserHistoryManager() class ModelManager: """ - Singleton class for managing model instances and device allocation + Enhanced Singleton class for managing model instances and device allocation specifically designed for Hugging Face Spaces deployment. + Includes support for multi-dimensional recommendation system. """ _instance = None _initialized = False _yolo_model = None _breed_model = None _device = None + _sbert_model = None + _config_manager = None + _enhanced_system_initialized = False def __new__(cls): if cls._instance is None: @@ -64,6 +70,9 @@ class ModelManager: self._device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ModelManager._initialized = True + # Initialize enhanced recommendation system + self._initialize_enhanced_system() + @property def device(self): if self._device is None: @@ -83,12 +92,12 @@ class ModelManager: num_classes=len(dog_breeds), device=self.device ).to(self.device) - + checkpoint = torch.load( 'ConvNextV2Base_best_model.pth', map_location=self.device ) - + # Try to load with model_state_dict first, then base_model if 'model_state_dict' in checkpoint: self._breed_model.load_state_dict(checkpoint['model_state_dict'], strict=False) @@ -98,10 +107,81 @@ class ModelManager: # If neither key exists, raise a descriptive error available_keys = list(checkpoint.keys()) if isinstance(checkpoint, dict) else "not a dictionary" raise KeyError(f"Model checkpoint does not contain 'model_state_dict' or 'base_model' keys. Available keys: {available_keys}") - + self._breed_model.eval() return self._breed_model + def _initialize_enhanced_system(self): + """Initialize enhanced multi-dimensional recommendation system""" + if ModelManager._enhanced_system_initialized: + return + + try: + # Initialize SBERT model for semantic analysis + try: + # Use default model configuration + model_name = 'all-MiniLM-L6-v2' + fallback_models = ['all-mpnet-base-v2', 'all-MiniLM-L12-v2'] + + for model_name_attempt in [model_name] + fallback_models: + try: + self._sbert_model = SentenceTransformer(model_name_attempt) + print(f"Initialized SBERT model: {model_name_attempt}") + break + except Exception as e: + print(f"Failed to load SBERT model {model_name_attempt}: {str(e)}") + continue + + if self._sbert_model is None: + print("All SBERT models failed to load, enhanced system will use keyword-only analysis") + + except Exception as e: + print(f"SBERT initialization failed: {str(e)}") + self._sbert_model = None + + ModelManager._enhanced_system_initialized = True + print("Enhanced recommendation system initialization completed") + + except ImportError as e: + print(f"Enhanced modules not available: {str(e)}") + ModelManager._enhanced_system_initialized = True # Mark as attempted + except Exception as e: + print(f"Enhanced system initialization failed: {str(e)}") + print(traceback.format_exc()) + ModelManager._enhanced_system_initialized = True # Mark as attempted + + @property + def sbert_model(self): + """Get SBERT model for semantic analysis""" + if not ModelManager._enhanced_system_initialized: + self._initialize_enhanced_system() + return self._sbert_model + + @property + def config_manager(self): + """Get configuration manager (simplified)""" + if not ModelManager._enhanced_system_initialized: + self._initialize_enhanced_system() + return None # Simplified - no config manager needed + + @property + def enhanced_system_available(self): + """Check if enhanced recommendation system is available""" + return (ModelManager._enhanced_system_initialized and + self._sbert_model is not None) + + def get_system_status(self): + """Get status of all managed models and systems""" + return { + 'device': str(self.device), + 'yolo_model_loaded': self._yolo_model is not None, + 'breed_model_loaded': self._breed_model is not None, + 'sbert_model_loaded': self._sbert_model is not None, + 'config_manager_available': False, # Simplified system + 'enhanced_system_initialized': ModelManager._enhanced_system_initialized, + 'enhanced_system_available': self.enhanced_system_available + } + # Initialize model manager model_manager = ModelManager() @@ -197,7 +277,7 @@ def detect_multiple_dogs(image, conf_threshold=0.3, iou_threshold=0.3): }) if not detected_boxes: - return [(image, 1, [0, 0, img_width, img_height], False)] + return [(image, 1.0, [0, 0, img_width, img_height], False)] # Phase 2: Analysis of detection relationships avg_height = sum(box['height'] for box in detected_boxes) / len(detected_boxes) @@ -211,7 +291,7 @@ def detect_multiple_dogs(image, conf_threshold=0.3, iou_threshold=0.3): y2 = min(box1['coords'][3], box2['coords'][3]) if x2 <= x1 or y2 <= y1: - return 0 + return 0.0 intersection = (x2 - x1) * (y2 - y1) area1 = box1['area'] @@ -328,7 +408,7 @@ def predict(image): print(f" Is dog: {is_dog}") print(f" Detection confidence: {detection_confidence:.4f}") - # 如果是狗且進行品種預測,在這裡也加入打印語句 + # 如果是狗且進行品種預測 if is_dog: top1_prob, topk_breeds, relative_probs = predict_single_dog(cropped_image) print(f" Breed prediction - Top probability: {top1_prob:.4f}") @@ -515,7 +595,8 @@ def main(): with gr.Tab("Style Transfer"): style_transfer_components = create_style_transfer_tab(dog_style_transfer) - # 6. History Search + + # 6. History Search create_history_tab(history_component) # Footer @@ -549,4 +630,4 @@ def main(): if __name__ == "__main__": iface = main() - iface.launch() \ No newline at end of file + iface.launch() diff --git a/bonus_penalty_engine.py b/bonus_penalty_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..9dc2e977c26dad71a432452a19ce064b42917547 --- /dev/null +++ b/bonus_penalty_engine.py @@ -0,0 +1,596 @@ +import math +from typing import Dict, Any +from dataclasses import dataclass + +@dataclass +class UserPreferences: + """使用者偏好設定的資料結構""" + living_space: str # "apartment", "house_small", "house_large" + yard_access: str # "no_yard", "shared_yard", "private_yard" + exercise_time: int # minutes per day + exercise_type: str # "light_walks", "moderate_activity", "active_training" + grooming_commitment: str # "low", "medium", "high" + experience_level: str # "beginner", "intermediate", "advanced" + time_availability: str # "limited", "moderate", "flexible" + has_children: bool + children_age: str # "toddler", "school_age", "teenager" + noise_tolerance: str # "low", "medium", "high" + space_for_play: bool + other_pets: bool + climate: str # "cold", "moderate", "hot" + health_sensitivity: str = "medium" + barking_acceptance: str = None + size_preference: str = "no_preference" # "no_preference", "small", "medium", "large", "giant" + training_commitment: str = "medium" # "low", "medium", "high" - 訓練投入程度 + living_environment: str = "ground_floor" # "ground_floor", "with_elevator", "walk_up" - 居住環境細節 + + def __post_init__(self): + if self.barking_acceptance is None: + self.barking_acceptance = self.noise_tolerance + + +class BonusPenaltyEngine: + """ + 加分扣分引擎類別 + 負責處理所有品種加分機制、額外評估因素和分數分布優化 + """ + + def __init__(self): + """初始化加分扣分引擎""" + pass + + @staticmethod + def calculate_breed_bonus(breed_info: dict, user_prefs: 'UserPreferences') -> float: + """ + 計算品種額外加分 + + Args: + breed_info: 品種資訊字典 + user_prefs: 使用者偏好設定 + + Returns: + float: 品種加分 (-0.25 到 0.5 之間) + """ + bonus = 0.0 + temperament = breed_info.get('Temperament', '').lower() + + # 1. 壽命加分(最高0.05) + try: + lifespan = breed_info.get('Lifespan', '10-12 years') + years = [int(x) for x in lifespan.split('-')[0].split()[0:1]] + longevity_bonus = min(0.05, (max(years) - 10) * 0.01) + bonus += longevity_bonus + except: + pass + + # 2. 性格特徵加分(最高0.15) + positive_traits = { + 'friendly': 0.05, + 'gentle': 0.05, + 'patient': 0.05, + 'intelligent': 0.04, + 'adaptable': 0.04, + 'affectionate': 0.04, + 'easy-going': 0.03, + 'calm': 0.03 + } + + negative_traits = { + 'aggressive': -0.08, + 'stubborn': -0.06, + 'dominant': -0.06, + 'aloof': -0.04, + 'nervous': -0.05, + 'protective': -0.04 + } + + personality_score = sum(value for trait, value in positive_traits.items() if trait in temperament) + personality_score += sum(value for trait, value in negative_traits.items() if trait in temperament) + bonus += max(-0.15, min(0.15, personality_score)) + + # 3. 適應性加分(最高0.1) + adaptability_bonus = 0.0 + if breed_info.get('Size') == "Small" and user_prefs.living_space == "apartment": + adaptability_bonus += 0.05 + if 'adaptable' in temperament or 'versatile' in temperament: + adaptability_bonus += 0.05 + bonus += min(0.1, adaptability_bonus) + + # 4. 家庭相容性(最高0.15) + if user_prefs.has_children: + family_traits = { + 'good with children': 0.06, + 'patient': 0.05, + 'gentle': 0.05, + 'tolerant': 0.04, + 'playful': 0.03 + } + unfriendly_traits = { + 'aggressive': -0.08, + 'nervous': -0.07, + 'protective': -0.06, + 'territorial': -0.05 + } + + # 年齡評估 + age_adjustments = { + 'toddler': {'bonus_mult': 0.7, 'penalty_mult': 1.3}, + 'school_age': {'bonus_mult': 1.0, 'penalty_mult': 1.0}, + 'teenager': {'bonus_mult': 1.2, 'penalty_mult': 0.8} + } + + adj = age_adjustments.get(user_prefs.children_age, + {'bonus_mult': 1.0, 'penalty_mult': 1.0}) + + family_bonus = sum(value for trait, value in family_traits.items() + if trait in temperament) * adj['bonus_mult'] + family_penalty = sum(value for trait, value in unfriendly_traits.items() + if trait in temperament) * adj['penalty_mult'] + + bonus += min(0.15, max(-0.2, family_bonus + family_penalty)) + + # 5. 專門技能加分(最高0.1) + skill_bonus = 0.0 + special_abilities = { + 'working': 0.03, + 'herding': 0.03, + 'hunting': 0.03, + 'tracking': 0.03, + 'agility': 0.02 + } + for ability, value in special_abilities.items(): + if ability in temperament.lower(): + skill_bonus += value + bonus += min(0.1, skill_bonus) + + # 6. 適應性評估(增強版) + adaptability_bonus = 0.0 + if breed_info.get('Size') == "Small" and user_prefs.living_space == "apartment": + adaptability_bonus += 0.08 # 小型犬更適合公寓 + + # 環境適應性評估 + if 'adaptable' in temperament or 'versatile' in temperament: + if user_prefs.living_space == "apartment": + adaptability_bonus += 0.10 # 適應性在公寓環境更重要 + else: + adaptability_bonus += 0.05 # 其他環境仍有加分 + + # 氣候適應性 + description = breed_info.get('Description', '').lower() + climate = user_prefs.climate + if climate == 'hot': + if 'heat tolerant' in description or 'warm climate' in description: + adaptability_bonus += 0.08 + elif 'thick coat' in description or 'cold climate' in description: + adaptability_bonus -= 0.10 + elif climate == 'cold': + if 'thick coat' in description or 'cold climate' in description: + adaptability_bonus += 0.08 + elif 'heat tolerant' in description or 'short coat' in description: + adaptability_bonus -= 0.10 + + bonus += min(0.15, adaptability_bonus) + + return min(0.5, max(-0.25, bonus)) + + @staticmethod + def calculate_additional_factors(breed_info: dict, user_prefs: 'UserPreferences') -> dict: + """ + 計算額外的評估因素,結合品種特性與使用者需求的全面評估系統 + + 1. 多功能性評估 - 品種的多樣化能力 + 2. 訓練性評估 - 學習和服從能力 + 3. 能量水平評估 - 活力和運動需求 + 4. 美容需求評估 - 護理和維護需求 + 5. 社交需求評估 - 與人互動的需求程度 + 6. 氣候適應性 - 對環境的適應能力 + 7. 運動類型匹配 - 與使用者運動習慣的契合度 + 8. 生活方式適配 - 與使用者日常生活的匹配度 + """ + factors = { + 'versatility': 0.0, # 多功能性 + 'trainability': 0.0, # 可訓練度 + 'energy_level': 0.0, # 能量水平 + 'grooming_needs': 0.0, # 美容需求 + 'social_needs': 0.0, # 社交需求 + 'weather_adaptability': 0.0,# 氣候適應性 + 'exercise_match': 0.0, # 運動匹配度 + 'lifestyle_fit': 0.0 # 生活方式適配度 + } + + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + size = breed_info.get('Size', 'Medium') + + # 1. 多功能性評估 - 加強品種用途評估 + versatile_traits = { + 'intelligent': 0.25, + 'adaptable': 0.25, + 'trainable': 0.20, + 'athletic': 0.15, + 'versatile': 0.15 + } + + working_roles = { + 'working': 0.20, + 'herding': 0.15, + 'hunting': 0.15, + 'sporting': 0.15, + 'companion': 0.10 + } + + # 計算特質分數 + trait_score = sum(value for trait, value in versatile_traits.items() + if trait in temperament) + + # 計算角色分數 + role_score = sum(value for role, value in working_roles.items() + if role in description) + + # 根據使用者需求調整多功能性評分 + purpose_traits = { + 'light_walks': ['calm', 'gentle', 'easy-going'], + 'moderate_activity': ['adaptable', 'balanced', 'versatile'], + 'active_training': ['intelligent', 'trainable', 'working'] + } + + if user_prefs.exercise_type in purpose_traits: + matching_traits = sum(1 for trait in purpose_traits[user_prefs.exercise_type] + if trait in temperament) + trait_score += matching_traits * 0.15 + + factors['versatility'] = min(1.0, trait_score + role_score) + + # 2. 訓練性評估 + trainable_traits = { + 'intelligent': 0.3, + 'eager to please': 0.3, + 'trainable': 0.2, + 'quick learner': 0.2, + 'obedient': 0.2 + } + + base_trainability = sum(value for trait, value in trainable_traits.items() + if trait in temperament) + + # 根據使用者經驗調整訓練性評分 + experience_multipliers = { + 'beginner': 1.2, # 新手更需要容易訓練的狗 + 'intermediate': 1.0, + 'advanced': 0.8 # 專家能處理較難訓練的狗 + } + + factors['trainability'] = min(1.0, base_trainability * + experience_multipliers.get(user_prefs.experience_level, 1.0)) + + # 3. 能量水平評估 + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + energy_levels = { + 'VERY HIGH': { + 'score': 1.0, + 'min_exercise': 120, + 'ideal_exercise': 150 + }, + 'HIGH': { + 'score': 0.8, + 'min_exercise': 90, + 'ideal_exercise': 120 + }, + 'MODERATE': { + 'score': 0.6, + 'min_exercise': 60, + 'ideal_exercise': 90 + }, + 'LOW': { + 'score': 0.4, + 'min_exercise': 30, + 'ideal_exercise': 60 + } + } + + breed_energy = energy_levels.get(exercise_needs, energy_levels['MODERATE']) + + # 計算運動時間匹配度 + if user_prefs.exercise_time >= breed_energy['ideal_exercise']: + energy_score = breed_energy['score'] + else: + # 如果運動時間不足,按比例降低分數 + deficit_ratio = max(0.4, user_prefs.exercise_time / breed_energy['ideal_exercise']) + energy_score = breed_energy['score'] * deficit_ratio + + factors['energy_level'] = energy_score + + # 4. 美容需求評估 + grooming_needs = breed_info.get('Grooming Needs', 'MODERATE').upper() + grooming_levels = { + 'HIGH': 1.0, + 'MODERATE': 0.6, + 'LOW': 0.3 + } + + # 特殊毛髮類型評估 + coat_adjustments = 0 + if 'long coat' in description: + coat_adjustments += 0.2 + if 'double coat' in description: + coat_adjustments += 0.15 + if 'curly' in description: + coat_adjustments += 0.15 + + # 根據使用者承諾度調整 + commitment_multipliers = { + 'low': 1.5, # 低承諾度時加重美容需求的影響 + 'medium': 1.0, + 'high': 0.8 # 高承諾度時降低美容需求的影響 + } + + base_grooming = grooming_levels.get(grooming_needs, 0.6) + coat_adjustments + factors['grooming_needs'] = min(1.0, base_grooming * + commitment_multipliers.get(user_prefs.grooming_commitment, 1.0)) + + # 5. 社交需求評估 + social_traits = { + 'friendly': 0.25, + 'social': 0.25, + 'affectionate': 0.20, + 'people-oriented': 0.20 + } + + antisocial_traits = { + 'independent': -0.20, + 'aloof': -0.20, + 'reserved': -0.15 + } + + social_score = sum(value for trait, value in social_traits.items() + if trait in temperament) + antisocial_score = sum(value for trait, value in antisocial_traits.items() + if trait in temperament) + + # 家庭情況調整 + if user_prefs.has_children: + child_friendly_bonus = 0.2 if 'good with children' in temperament else 0 + social_score += child_friendly_bonus + + factors['social_needs'] = min(1.0, max(0.0, social_score + antisocial_score)) + + # 6. 氣候適應性評估 - 更細緻的環境適應評估 + climate_traits = { + 'cold': { + 'positive': ['thick coat', 'winter', 'cold climate'], + 'negative': ['short coat', 'heat sensitive'] + }, + 'hot': { + 'positive': ['short coat', 'heat tolerant', 'warm climate'], + 'negative': ['thick coat', 'cold climate'] + }, + 'moderate': { + 'positive': ['adaptable', 'all climate'], + 'negative': [] + } + } + + climate_score = 0.4 # 基礎分數 + if user_prefs.climate in climate_traits: + # 正面特質加分 + climate_score += sum(0.2 for term in climate_traits[user_prefs.climate]['positive'] + if term in description) + # 負面特質減分 + climate_score -= sum(0.2 for term in climate_traits[user_prefs.climate]['negative'] + if term in description) + + factors['weather_adaptability'] = min(1.0, max(0.0, climate_score)) + + # 7. 運動類型匹配評估 + exercise_type_traits = { + 'light_walks': ['calm', 'gentle'], + 'moderate_activity': ['adaptable', 'balanced'], + 'active_training': ['athletic', 'energetic'] + } + + if user_prefs.exercise_type in exercise_type_traits: + match_score = sum(0.25 for trait in exercise_type_traits[user_prefs.exercise_type] + if trait in temperament) + factors['exercise_match'] = min(1.0, match_score + 0.5) # 基礎分0.5 + + # 8. 生活方式適配評估 + lifestyle_score = 0.5 # 基礎分數 + + # 空間適配 + if user_prefs.living_space == 'apartment': + if size == 'Small': + lifestyle_score += 0.2 + elif size == 'Large': + lifestyle_score -= 0.2 + elif user_prefs.living_space == 'house_large': + if size in ['Large', 'Giant']: + lifestyle_score += 0.2 + + # 時間可用性適配 + time_availability_bonus = { + 'limited': -0.1, + 'moderate': 0, + 'flexible': 0.1 + } + lifestyle_score += time_availability_bonus.get(user_prefs.time_availability, 0) + + factors['lifestyle_fit'] = min(1.0, max(0.0, lifestyle_score)) + + return factors + + def amplify_score_extreme(self, score: float) -> float: + """ + 優化分數分布,提供更有意義的評分範圍。 + 純粹進行數學轉換,不依賴外部資訊。 + + Parameters: + score: 原始評分(0-1之間的浮點數) + + Returns: + float: 調整後的評分(0-1之間的浮點數) + """ + def smooth_curve(x: float, steepness: float = 12) -> float: + """創建平滑的S型曲線用於分數轉換""" + return 1 / (1 + math.exp(-steepness * (x - 0.5))) + + # 90-100分的轉換(極佳匹配) + if score >= 0.90: + position = (score - 0.90) / 0.10 + return 0.96 + (position * 0.04) + + # 80-90分的轉換(優秀匹配) + elif score >= 0.80: + position = (score - 0.80) / 0.10 + return 0.90 + (position * 0.06) + + # 70-80分的轉換(良好匹配) + elif score >= 0.70: + position = (score - 0.70) / 0.10 + return 0.82 + (position * 0.08) + + # 50-70分的轉換(可接受匹配) + elif score >= 0.50: + position = (score - 0.50) / 0.20 + return 0.75 + (smooth_curve(position) * 0.07) + + # 50分以下的轉換(較差匹配) + else: + position = score / 0.50 + return 0.70 + (smooth_curve(position) * 0.05) + + def apply_special_case_adjustments(self, score: float, user_prefs: UserPreferences, breed_info: dict) -> float: + """ + 處理特殊情況和極端案例的評分調整。這個函數特別關注: + 1. 條件組合的協同效應 + 2. 品種特性的獨特需求 + 3. 極端情況的合理處理 + + 這個函數就像是一個細心的裁判,會考慮到各種特殊情況, + 並根據具體場景做出合理的評分調整。 + + Parameters: + score: 初始評分 + user_prefs: 使用者偏好 + breed_info: 品種資訊 + Returns: + float: 調整後的評分(0.2-1.0之間) + """ + severity_multiplier = 1.0 + + def evaluate_spatial_exercise_combination() -> float: + """ + 評估空間與運動需求的組合效應。 + + 這個函數不再過分懲罰大型犬,而是更多地考慮品種的實際特性。 + 就像評估一個運動員是否適合在特定場地訓練一樣,我們需要考慮 + 場地大小和運動需求的整體匹配度。 + """ + multiplier = 1.0 + + if user_prefs.living_space == 'apartment': + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + + # 檢查品種是否有利於公寓生活的特徵 + apartment_friendly = any(trait in temperament or trait in description + for trait in ['calm', 'adaptable', 'quiet']) + + # 大型犬的特殊處理 + if breed_info['Size'] in ['Large', 'Giant']: + if apartment_friendly: + multiplier *= 0.85 # 從0.7提升到0.85,降低懲罰 + else: + multiplier *= 0.75 # 從0.5提升到0.75 + + # 檢查運動需求的匹配度 + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + exercise_time = user_prefs.exercise_time + + if exercise_needs in ['HIGH', 'VERY HIGH']: + if exercise_time >= 120: # 高運動量可以部分補償空間限制 + multiplier *= 1.1 + + return multiplier + + def evaluate_experience_combination() -> float: + """ + 評估經驗需求的複合影響。 + + 這個函數就像是評估一個工作崗位與應聘者經驗的匹配度, + 需要綜合考慮工作難度和應聘者能力。 + """ + multiplier = 1.0 + temperament = breed_info.get('Temperament', '').lower() + care_level = breed_info.get('Care Level', 'MODERATE') + + # 新手飼主的特殊考慮,更寬容的評估標準 + if user_prefs.experience_level == 'beginner': + if care_level == 'HIGH': + if user_prefs.has_children: + multiplier *= 0.7 # 從0.5提升到0.7 + else: + multiplier *= 0.8 # 從0.6提升到0.8 + + # 性格特徵影響,降低懲罰程度 + challenging_traits = { + 'stubborn': -0.10, # 從-0.15降低 + 'independent': -0.08, # 從-0.12降低 + 'dominant': -0.08, # 從-0.12降低 + 'protective': -0.06, # 從-0.10降低 + 'aggressive': -0.15 # 保持較高懲罰因安全考慮 + } + + for trait, penalty in challenging_traits.items(): + if trait in temperament: + multiplier *= (1 + penalty) + + return multiplier + + def evaluate_breed_specific_requirements() -> float: + """ + 評估品種特定需求。 + + 這個函數就像是為每個品種量身定制評估標準, + 考慮其獨特的特性和需求。 + """ + multiplier = 1.0 + exercise_time = user_prefs.exercise_time + exercise_type = user_prefs.exercise_type + + # 檢查品種特性 + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + + # 運動需求匹配度評估,更合理的標準 + if exercise_needs == 'LOW': + if exercise_time > 120: + multiplier *= 0.85 # 從0.5提升到0.85 + elif exercise_needs == 'VERY HIGH': + if exercise_time < 60: + multiplier *= 0.7 # 從0.5提升到0.7 + + # 特殊品種類型的考慮 + if 'sprint' in temperament: + if exercise_time > 120 and exercise_type != 'active_training': + multiplier *= 0.85 # 從0.7提升到0.85 + + if any(trait in temperament for trait in ['working', 'herding']): + if exercise_time < 90 or exercise_type == 'light_walks': + multiplier *= 0.8 # 從0.7提升到0.8 + + return multiplier + + # 計算各項調整 + space_exercise_mult = evaluate_spatial_exercise_combination() + experience_mult = evaluate_experience_combination() + breed_specific_mult = evaluate_breed_specific_requirements() + + # 整合所有調整因素 + severity_multiplier *= space_exercise_mult + severity_multiplier *= experience_mult + severity_multiplier *= breed_specific_mult + + # 應用最終調整,確保分數在合理範圍內 + final_score = score * severity_multiplier + return max(0.2, min(1.0, final_score)) diff --git a/breed_recommendation_enhanced.py b/breed_recommendation_enhanced.py new file mode 100644 index 0000000000000000000000000000000000000000..5c1b2812929e721d59b089c2b4e703b5d9536ebc --- /dev/null +++ b/breed_recommendation_enhanced.py @@ -0,0 +1,640 @@ +import gradio as gr +from typing import Dict, List, Any, Optional +import traceback +from semantic_breed_recommender import get_breed_recommendations_by_description, get_enhanced_recommendations_with_unified_scoring +from natural_language_processor import get_nlp_processor +from recommendation_html_format import format_unified_recommendation_html + +def create_description_examples(): + """Create HTML for description examples with dynamic visibility""" + return """ +
+

💡 Example Descriptions - Try These Expression Styles:

+ +
+
+ 🏠 Living Environment:
+ + "I live in an apartment and need a quiet, small dog that's good with children" + +
+ +
+ 🎾 Activity Preferences:
+ + "I want an active medium to large dog for hiking and outdoor activities" + +
+ +
+ ❤️ Breed Preferences:
+ + "I love Border Collies most, then Golden Retrievers, followed by Pugs" + +
+ +
+ 👥 Family Situation:
+ + "Looking for a calm, low-maintenance companion dog for elderly person" + +
+
+ +
+ 🔍 Tips: + + Please describe in English, including living environment, preferred breeds, family situation, activity needs, etc. The more detailed your description, the more accurate the recommendations! + +
+
+ """ + + +def create_recommendation_tab( + UserPreferences, + get_breed_recommendations, + format_recommendation_html, + history_component +): + """Create the enhanced breed recommendation tab with natural language support""" + + with gr.TabItem("Breed Recommendation"): + with gr.Tabs(): + # -------------------------- + # Tab 1: Find by Criteria + # -------------------------- + with gr.Tab("Find by Criteria"): + gr.HTML(""" +
+ +

+ Tell us about your lifestyle, and we'll recommend the perfect dog breeds for you! +

+
+ 🔬 + The matching algorithm is continuously improving. Results are for reference only. +
+
+ """) + + with gr.Row(): + with gr.Column(): + living_space = gr.Radio( + choices=["apartment", "house_small", "house_large"], + label="What type of living space do you have?", + info="Choose your current living situation", + value="apartment" + ) + + yard_access = gr.Radio( + choices=["no_yard", "shared_yard", "private_yard"], + label="Yard Access Type", + info="Available outdoor space", + value="no_yard" + ) + + exercise_time = gr.Slider( + minimum=0, + maximum=180, + value=60, + label="Daily exercise time (minutes)", + info="Consider walks, play time, and training" + ) + + exercise_type = gr.Radio( + choices=["light_walks", "moderate_activity", "active_training"], + label="Exercise Style", + info="What kind of activities do you prefer?", + value="moderate_activity" + ) + + grooming_commitment = gr.Radio( + choices=["low", "medium", "high"], + label="Grooming commitment level", + info="Low: monthly, Medium: weekly, High: daily", + value="medium" + ) + + with gr.Column(): + size_preference = gr.Radio( + choices=["no_preference", "small", "medium", "large", "giant"], + label="Preference Dog Size", + info="Select your preferred dog size - this will strongly filter the recommendations", + value="no_preference" + ) + experience_level = gr.Radio( + choices=["beginner", "intermediate", "advanced"], + label="Dog ownership experience", + info="Be honest - this helps find the right match", + value="beginner" + ) + + time_availability = gr.Radio( + choices=["limited", "moderate", "flexible"], + label="Time Availability", + info="Time available for dog care daily", + value="moderate" + ) + + has_children = gr.Checkbox( + label="Have children at home", + info="Helps recommend child-friendly breeds" + ) + + children_age = gr.Radio( + choices=["toddler", "school_age", "teenager"], + label="Children's Age Group", + info="Helps match with age-appropriate breeds", + visible=False + ) + + noise_tolerance = gr.Radio( + choices=["low", "medium", "high"], + label="Noise tolerance level", + info="Some breeds are more vocal than others", + value="medium" + ) + + def update_children_age_visibility(has_children_val): + """Update children age visibility based on has_children checkbox""" + return gr.update(visible=has_children_val) + + has_children.change( + fn=update_children_age_visibility, + inputs=[has_children], + outputs=[children_age] + ) + + # --------- 條件搜尋--------- + def find_breed_matches( + living_space, yard_access, exercise_time, exercise_type, + grooming_commitment, size_preference, experience_level, + time_availability, has_children, children_age, noise_tolerance + ): + """Process criteria-based breed matching and persist history""" + try: + # 1) 建立偏好 + user_prefs = UserPreferences( + living_space=living_space, + yard_access=yard_access, + exercise_time=exercise_time, + exercise_type=exercise_type, + grooming_commitment=grooming_commitment, + size_preference=size_preference, + experience_level=experience_level, + time_availability=time_availability, + has_children=has_children, + children_age=children_age if has_children else None, + noise_tolerance=noise_tolerance, + # 其他欄位依原始設計 + space_for_play=(living_space != "apartment"), + other_pets=False, + climate="moderate", + health_sensitivity="medium", + barking_acceptance=noise_tolerance + ) + + # 2) 取得推薦 + recommendations = get_breed_recommendations(user_prefs) + print(f"[CRITERIA] generated={len(recommendations) if recommendations else 0}") + + if not recommendations: + return format_recommendation_html([], is_description_search=False) + + # 3) 準備歷史資料(final_score / overall_score 同步) + history_results = [] + for idx, rec in enumerate(recommendations, start=1): + final_score = rec.get("final_score", rec.get("overall_score", 0)) + overall_score = final_score # Ensure consistency + history_results.append({ + "breed": rec.get("breed", "Unknown"), + "rank": rec.get("rank", idx), + "final_score": final_score, + "overall_score": overall_score, + "base_score": rec.get("base_score", 0), + "bonus_score": rec.get("bonus_score", 0), + "scores": rec.get("scores", {}) + }) + + prefs_dict = user_prefs.__dict__ if hasattr(user_prefs, "__dict__") else user_prefs + + # 4) 寫入歷史(criteria) + try: + ok = history_component.save_search( + user_preferences=prefs_dict, + results=history_results, + search_type="criteria", + description=None + ) + print(f"[CRITERIA SAVE] ok={ok}, saved={len(history_results)}") + except Exception as e: + print(f"[CRITERIA SAVE][ERROR] {str(e)}") + + # 5) 顯示結果 + return format_recommendation_html(recommendations, is_description_search=False) + + except Exception as e: + print(f"[CRITERIA][ERROR] {str(e)}") + print(traceback.format_exc()) + return f""" +
+

⚠️ Error generating recommendations

+

We encountered an issue while processing your preferences.

+

Error details: {str(e)}

+
+ """ + + find_button = gr.Button("🔍 Find My Perfect Match!", elem_id="find-match-btn", size="lg") + criteria_results = gr.HTML(label="Breed Recommendations") + find_button.click( + fn=find_breed_matches, + inputs=[living_space, yard_access, exercise_time, exercise_type, + grooming_commitment, size_preference, experience_level, + time_availability, has_children, children_age, noise_tolerance], + outputs=criteria_results + ) + + # -------------------------- + # Tab 2: Find by Description + # -------------------------- + with gr.Tab("Find by Description") as description_tab: + gr.HTML(""" +
+
NEW
+

+ Describe your needs in natural language, and AI will find the most suitable breeds! +

+
+ 🚀 + New Feature: Based on advanced semantic understanding technology, making search more aligned with your real needs! +
+
+ """) + + examples_display = gr.HTML(create_description_examples()) + + description_input = gr.Textbox( + label="🗣️ Please describe your needs", + placeholder=("Example: I live in an apartment and need a quiet, small dog that's good with children. " + "I prefer Border Collies and Golden Retrievers..."), + lines=4, + max_lines=6, + elem_classes=["description-input"] + ) + + validation_status = gr.HTML(visible=False) + + # Accuracy disclaimer + gr.HTML(""" +
+
+ +
+
+
+ Accuracy Continuously Improving - Use as Reference Guide +
+
+ The AI recommendation system is constantly learning and improving. Use these recommendations as a helpful reference for your pet adoption. +
+
+
+ """) + + def validate_description_input(text): + """Validate description input""" + try: + nlp = get_nlp_processor() + validation = nlp.validate_input(text) + if validation.get("is_valid", True): + return gr.update(visible=False), True + else: + error_html = f""" +
+ ⚠️ {validation.get('error', 'Invalid input')}
+ {"
".join(f"• {s}" for s in validation.get('suggestions', []))} +
+ """ + return gr.update(value=error_html, visible=True), False + except Exception as e: + # 無 NLP 驗證也可放行 + print(f"[DESC][VALIDATE][WARN] {str(e)}") + return gr.update(visible=False), True + + def find_breeds_by_description(description_text): + """Find breeds based on description and persist history""" + try: + if not description_text or not description_text.strip(): + return """ +
+

Please enter your description to get personalized recommendations

+
+ """ + + # 驗證(若可用) + try: + nlp = get_nlp_processor() + validation = nlp.validate_input(description_text) + if not validation.get("is_valid", True): + return f""" +
+

⚠️ Input validation failed

+

{validation.get('error','Invalid input')}

+ +
+ """ + except Exception as e: + print(f"[DESC][VALIDATE][WARN] {str(e)} (skip validation)") + + # 取得增強語意推薦 + recommendations = get_enhanced_recommendations_with_unified_scoring( + user_description=description_text, + top_k=15 + ) + print(f"[DESC] generated={len(recommendations) if recommendations else 0}") + + if not recommendations: + return """ +
+

😔 No matching breeds found

+

No dog breeds match your specific requirements. Please try:

+ +
+ """ + + # 準備歷史資料 + def _to_float(x, default=0.0): + try: + return float(x) + except Exception: + return default + + history_results = [] + for i, rec in enumerate(recommendations, start=1): + final_score = _to_float(rec.get("final_score", rec.get("overall_score", 0))) + overall_score = final_score # Ensure consistency between final_score and overall_score + history_results.append({ + "breed": str(rec.get("breed", "Unknown")), + "rank": int(rec.get("rank", i)), + "final_score": final_score, + "overall_score": overall_score, + "semantic_score": _to_float(rec.get("semantic_score", 0)), + "comparative_bonus": _to_float(rec.get("comparative_bonus", 0)), + "lifestyle_bonus": _to_float(rec.get("lifestyle_bonus", 0)), + "size": str(rec.get("size", "Unknown")), + "scores": rec.get("scores", {}) + }) + + # 寫入歷史(description) + try: + ok = history_component.save_search( + user_preferences=None, + results=history_results, + search_type="description", + description=description_text + ) + print(f"[DESC SAVE] ok={ok}, saved={len(history_results)}") + except Exception as e: + print(f"[DESC SAVE][ERROR] {str(e)}") + + # 使用統一HTML格式化器顯示增強推薦結果 + html_output = format_unified_recommendation_html(recommendations, is_description_search=True) + return html_output + + except RuntimeError as e: + error_msg = str(e) + print(f"[DESC][RUNTIME_ERROR] {error_msg}") + return f""" +
+

🔧 System Configuration Issue

+

+ {error_msg.replace(chr(10), '
').replace('•', '•')} +

+
+

💡 What you can try:

+ +
+
+ """ + except ValueError as e: + error_msg = str(e) + print(f"[DESC][VALUE_ERROR] {error_msg}") + return f""" +
+

🔍 No Matching Results

+

+ {error_msg} +

+
+

💡 Suggestions to get better results:

+ +
+
+ """ + except Exception as e: + error_msg = str(e) + print(f"[DESC][ERROR] {error_msg}") + print(traceback.format_exc()) + return f""" +
+

⚠️ Unexpected Error

+

An unexpected error occurred while processing your description.

+
+ Show technical details +
{error_msg}
+
+

Please try the "Find by Criteria" tab or contact support.

+
+ """ + + description_input.change( + fn=lambda x: validate_description_input(x)[0], + inputs=[description_input], + outputs=[validation_status] + ) + + description_button = gr.Button("🤖 Smart Breed Recommendation", elem_id="find-by-description-btn", size="lg") + description_results = gr.HTML(label="AI Breed Recommendations") + + description_button.click( + fn=find_breeds_by_description, + inputs=[description_input], + outputs=[description_results] + ) + + return { + 'criteria_results': locals().get('criteria_results'), + 'description_results': locals().get('description_results'), + 'description_input': locals().get('description_input') + } diff --git a/config_manager.py b/config_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..76a57ddbdc17572da4bdc04ac675c3ecca564537 --- /dev/null +++ b/config_manager.py @@ -0,0 +1,554 @@ +import json +import sqlite3 +import numpy as np +from typing import Dict, List, Tuple, Any, Optional, Union +from dataclasses import dataclass, field, asdict +from enum import Enum +import os +import traceback +from dog_database import get_dog_description +from breed_health_info import breed_health_info +from breed_noise_info import breed_noise_info + +class DataQuality(Enum): + """資料品質等級""" + HIGH = "high" # 完整且可靠的資料 + MEDIUM = "medium" # 部分資料或推斷資料 + LOW = "low" # 不完整或不確定的資料 + UNKNOWN = "unknown" # 未知或缺失資料 + +@dataclass +class BreedStandardization: + """品種標準化資料結構""" + canonical_name: str + display_name: str + aliases: List[str] = field(default_factory=list) + size_category: int = 1 # 1=tiny, 2=small, 3=medium, 4=large, 5=giant + exercise_level: int = 2 # 1=low, 2=moderate, 3=high, 4=very_high + noise_level: int = 2 # 1=low, 2=moderate, 3=high + care_complexity: int = 2 # 1=low, 2=moderate, 3=high + child_compatibility: float = 0.5 # 0=no, 0.5=unknown, 1=yes + data_quality_scores: Dict[str, DataQuality] = field(default_factory=dict) + confidence_flags: Dict[str, float] = field(default_factory=dict) + +@dataclass +class ConfigurationSettings: + """配置設定結構""" + scoring_weights: Dict[str, float] = field(default_factory=dict) + calibration_settings: Dict[str, Any] = field(default_factory=dict) + constraint_thresholds: Dict[str, float] = field(default_factory=dict) + semantic_model_config: Dict[str, Any] = field(default_factory=dict) + data_imputation_rules: Dict[str, Any] = field(default_factory=dict) + debug_mode: bool = False + version: str = "1.0.0" + +class ConfigManager: + """ + 中央化配置和資料標準化管理系統 + 處理品種資料標準化、配置管理和資料品質評估 + """ + + def __init__(self, config_file: Optional[str] = None): + """初始化配置管理器""" + self.config_file = config_file or "config.json" + self.breed_standardization = {} + self.configuration = ConfigurationSettings() + self.breed_aliases = {} + self._load_default_configuration() + self._initialize_breed_standardization() + + # 嘗試載入自定義配置 + if os.path.exists(self.config_file): + self._load_configuration() + + def _load_default_configuration(self): + """載入預設配置""" + self.configuration = ConfigurationSettings( + scoring_weights={ + 'activity_compatibility': 0.35, + 'noise_compatibility': 0.25, + 'spatial_compatibility': 0.15, + 'family_compatibility': 0.10, + 'maintenance_compatibility': 0.10, + 'size_compatibility': 0.05 + }, + calibration_settings={ + 'target_range_min': 0.45, + 'target_range_max': 0.95, + 'min_effective_range': 0.3, + 'auto_calibration': True, + 'tie_breaking_enabled': True + }, + constraint_thresholds={ + 'apartment_size_limit': 3, # 最大允許尺寸 (medium) + 'high_exercise_threshold': 3, # 高運動需求閾值 + 'quiet_noise_limit': 2, # 安靜環境噪音限制 + 'child_safety_threshold': 0.8 # 兒童安全最低分數 + }, + semantic_model_config={ + 'model_name': 'all-MiniLM-L6-v2', + 'fallback_models': ['all-mpnet-base-v2', 'all-MiniLM-L12-v2'], + 'similarity_threshold': 0.5, + 'cache_embeddings': True + }, + data_imputation_rules={ + 'noise_level_defaults': { + 'terrier': 'high', + 'hound': 'high', + 'herding': 'moderate', + 'toy': 'moderate', + 'working': 'moderate', + 'sporting': 'moderate', + 'non_sporting': 'low', + 'unknown': 'moderate' + }, + 'exercise_level_defaults': { + 'working': 'high', + 'sporting': 'high', + 'herding': 'high', + 'terrier': 'moderate', + 'hound': 'moderate', + 'toy': 'low', + 'non_sporting': 'moderate', + 'unknown': 'moderate' + } + }, + debug_mode=False, + version="1.0.0" + ) + + def _initialize_breed_standardization(self): + """初始化品種標準化""" + try: + # 獲取所有品種 + breeds = self._get_all_breeds() + + for breed in breeds: + standardized = self._standardize_breed_data(breed) + self.breed_standardization[breed] = standardized + + # 建立別名映射 + for alias in standardized.aliases: + self.breed_aliases[alias.lower()] = breed + + print(f"Initialized standardization for {len(self.breed_standardization)} breeds") + + except Exception as e: + print(f"Error initializing breed standardization: {str(e)}") + print(traceback.format_exc()) + + def _get_all_breeds(self) -> List[str]: + """獲取所有品種清單""" + try: + conn = sqlite3.connect('animal_detector.db') + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") + breeds = [row[0] for row in cursor.fetchall()] + cursor.close() + conn.close() + return breeds + except Exception as e: + print(f"Error getting breed list: {str(e)}") + return [] + + def _standardize_breed_data(self, breed: str) -> BreedStandardization: + """標準化品種資料""" + try: + # 基本資訊 + breed_info = get_dog_description(breed) or {} + health_info = breed_health_info.get(breed, {}) + noise_info = breed_noise_info.get(breed, {}) + + # 建立標準化結構 + canonical_name = breed + display_name = breed.replace('_', ' ') + aliases = self._generate_breed_aliases(breed) + + # 標準化分類數據 + size_category = self._standardize_size(breed_info.get('Size', '')) + exercise_level = self._standardize_exercise_needs(breed_info.get('Exercise Needs', '')) + noise_level = self._standardize_noise_level(noise_info.get('noise_level', '')) + care_complexity = self._standardize_care_level(breed_info.get('Care Level', '')) + child_compatibility = self._standardize_child_compatibility( + breed_info.get('Good with Children', '') + ) + + # 評估資料品質 + data_quality_scores = self._assess_data_quality(breed_info, health_info, noise_info) + confidence_flags = self._calculate_confidence_flags(breed_info, health_info, noise_info) + + return BreedStandardization( + canonical_name=canonical_name, + display_name=display_name, + aliases=aliases, + size_category=size_category, + exercise_level=exercise_level, + noise_level=noise_level, + care_complexity=care_complexity, + child_compatibility=child_compatibility, + data_quality_scores=data_quality_scores, + confidence_flags=confidence_flags + ) + + except Exception as e: + print(f"Error standardizing breed {breed}: {str(e)}") + return BreedStandardization( + canonical_name=breed, + display_name=breed.replace('_', ' '), + aliases=self._generate_breed_aliases(breed) + ) + + def _generate_breed_aliases(self, breed: str) -> List[str]: + """生成品種別名""" + aliases = [] + display_name = breed.replace('_', ' ') + + # 基本別名 + aliases.append(display_name.lower()) + aliases.append(breed.lower()) + + # 常見縮寫和變體 + breed_aliases_map = { + 'German_Shepherd': ['gsd', 'german shepherd dog', 'alsatian'], + 'Labrador_Retriever': ['lab', 'labrador', 'retriever'], + 'Golden_Retriever': ['golden', 'goldie'], + 'Border_Collie': ['border', 'collie'], + 'Yorkshire_Terrier': ['yorkie', 'york', 'yorkshire'], + 'French_Bulldog': ['frenchie', 'french bull', 'bouledogue français'], + 'Boston_Terrier': ['boston bull', 'american gentleman'], + 'Cavalier_King_Charles_Spaniel': ['cavalier', 'ckcs', 'king charles'], + 'American_Staffordshire_Terrier': ['amstaff', 'american staff'], + 'Jack_Russell_Terrier': ['jrt', 'jack russell', 'parson russell'], + 'Shih_Tzu': ['shih tzu', 'lion dog'], + 'Bichon_Frise': ['bichon', 'powder puff'], + 'Cocker_Spaniel': ['cocker', 'english cocker', 'american cocker'] + } + + if breed in breed_aliases_map: + aliases.extend(breed_aliases_map[breed]) + + # 移除重複 + return list(set(aliases)) + + def _standardize_size(self, size_str: str) -> int: + """標準化體型分類""" + size_mapping = { + 'tiny': 1, 'toy': 1, + 'small': 2, 'little': 2, 'compact': 2, + 'medium': 3, 'moderate': 3, 'average': 3, + 'large': 4, 'big': 4, + 'giant': 5, 'huge': 5, 'extra large': 5 + } + + size_lower = size_str.lower() + for key, value in size_mapping.items(): + if key in size_lower: + return value + + return 3 # 預設為 medium + + def _standardize_exercise_needs(self, exercise_str: str) -> int: + """標準化運動需求""" + exercise_mapping = { + 'low': 1, 'minimal': 1, 'light': 1, + 'moderate': 2, 'average': 2, 'medium': 2, 'regular': 2, + 'high': 3, 'active': 3, 'vigorous': 3, + 'very high': 4, 'extreme': 4, 'intense': 4 + } + + exercise_lower = exercise_str.lower() + for key, value in exercise_mapping.items(): + if key in exercise_lower: + return value + + return 2 # 預設為 moderate + + def _standardize_noise_level(self, noise_str: str) -> int: + """標準化噪音水平""" + noise_mapping = { + 'low': 1, 'quiet': 1, 'silent': 1, 'minimal': 1, + 'moderate': 2, 'average': 2, 'medium': 2, 'occasional': 2, + 'high': 3, 'loud': 3, 'vocal': 3, 'frequent': 3 + } + + noise_lower = noise_str.lower() + for key, value in noise_mapping.items(): + if key in noise_lower: + return value + + return 2 # 預設為 moderate + + def _standardize_care_level(self, care_str: str) -> int: + """標準化護理複雜度""" + care_mapping = { + 'low': 1, 'easy': 1, 'simple': 1, 'minimal': 1, + 'moderate': 2, 'average': 2, 'medium': 2, 'regular': 2, + 'high': 3, 'complex': 3, 'intensive': 3, 'demanding': 3 + } + + care_lower = care_str.lower() + for key, value in care_mapping.items(): + if key in care_lower: + return value + + return 2 # 預設為 moderate + + def _standardize_child_compatibility(self, child_str: str) -> float: + """標準化兒童相容性""" + if child_str.lower() == 'yes': + return 1.0 + elif child_str.lower() == 'no': + return 0.0 + else: + return 0.5 # 未知或不確定 + + def _assess_data_quality(self, breed_info: Dict, health_info: Dict, + noise_info: Dict) -> Dict[str, DataQuality]: + """評估資料品質""" + quality_scores = {} + + # 基本資訊品質 + if breed_info: + required_fields = ['Size', 'Exercise Needs', 'Temperament', 'Good with Children'] + complete_fields = sum(1 for field in required_fields if breed_info.get(field)) + + if complete_fields >= 4: + quality_scores['basic_info'] = DataQuality.HIGH + elif complete_fields >= 2: + quality_scores['basic_info'] = DataQuality.MEDIUM + else: + quality_scores['basic_info'] = DataQuality.LOW + else: + quality_scores['basic_info'] = DataQuality.UNKNOWN + + # 健康資訊品質 + if health_info and health_info.get('health_notes'): + quality_scores['health_info'] = DataQuality.HIGH + elif health_info: + quality_scores['health_info'] = DataQuality.MEDIUM + else: + quality_scores['health_info'] = DataQuality.UNKNOWN + + # 噪音資訊品質 + if noise_info and noise_info.get('noise_level'): + quality_scores['noise_info'] = DataQuality.HIGH + else: + quality_scores['noise_info'] = DataQuality.LOW + + return quality_scores + + def _calculate_confidence_flags(self, breed_info: Dict, health_info: Dict, + noise_info: Dict) -> Dict[str, float]: + """計算信心度標記""" + confidence_flags = {} + + # 基本資訊信心度 + basic_confidence = 0.8 if breed_info else 0.2 + if breed_info and breed_info.get('Description'): + basic_confidence += 0.1 + confidence_flags['basic_info'] = min(1.0, basic_confidence) + + # 健康資訊信心度 + health_confidence = 0.7 if health_info else 0.3 + confidence_flags['health_info'] = health_confidence + + # 噪音資訊信心度 + noise_confidence = 0.8 if noise_info else 0.4 + confidence_flags['noise_info'] = noise_confidence + + # 整體信心度 + confidence_flags['overall'] = np.mean(list(confidence_flags.values())) + + return confidence_flags + + def get_standardized_breed_data(self, breed: str) -> Optional[BreedStandardization]: + """獲取標準化品種資料""" + # 嘗試直接匹配 + if breed in self.breed_standardization: + return self.breed_standardization[breed] + + # 嘗試別名匹配 + breed_lower = breed.lower() + if breed_lower in self.breed_aliases: + canonical_breed = self.breed_aliases[breed_lower] + return self.breed_standardization.get(canonical_breed) + + # 模糊匹配 + for alias, canonical_breed in self.breed_aliases.items(): + if breed_lower in alias or alias in breed_lower: + return self.breed_standardization.get(canonical_breed) + + return None + + def apply_data_imputation(self, breed: str) -> BreedStandardization: + """應用資料插補規則""" + try: + standardized = self.get_standardized_breed_data(breed) + if not standardized: + return BreedStandardization(canonical_name=breed, display_name=breed.replace('_', ' ')) + + imputation_rules = self.configuration.data_imputation_rules + + # 噪音水平插補 + if standardized.noise_level == 2: # moderate (可能是預設值) + breed_group = self._determine_breed_group(breed) + noise_defaults = imputation_rules.get('noise_level_defaults', {}) + if breed_group in noise_defaults: + imputed_noise = self._standardize_noise_level(noise_defaults[breed_group]) + standardized.noise_level = imputed_noise + standardized.confidence_flags['noise_info'] *= 0.7 # 降低信心度 + + # 運動需求插補 + if standardized.exercise_level == 2: # moderate (可能是預設值) + breed_group = self._determine_breed_group(breed) + exercise_defaults = imputation_rules.get('exercise_level_defaults', {}) + if breed_group in exercise_defaults: + imputed_exercise = self._standardize_exercise_needs(exercise_defaults[breed_group]) + standardized.exercise_level = imputed_exercise + standardized.confidence_flags['basic_info'] *= 0.8 # 降低信心度 + + return standardized + + except Exception as e: + print(f"Error applying data imputation for {breed}: {str(e)}") + return self.get_standardized_breed_data(breed) or BreedStandardization( + canonical_name=breed, display_name=breed.replace('_', ' ') + ) + + def _determine_breed_group(self, breed: str) -> str: + """確定品種群組""" + breed_lower = breed.lower() + + if 'terrier' in breed_lower: + return 'terrier' + elif 'hound' in breed_lower: + return 'hound' + elif any(word in breed_lower for word in ['shepherd', 'collie', 'cattle', 'sheepdog']): + return 'herding' + elif any(word in breed_lower for word in ['retriever', 'pointer', 'setter', 'spaniel']): + return 'sporting' + elif any(word in breed_lower for word in ['mastiff', 'great', 'rottweiler', 'akita']): + return 'working' + elif any(word in breed_lower for word in ['toy', 'pug', 'chihuahua', 'papillon']): + return 'toy' + else: + return 'unknown' + + def _load_configuration(self): + """載入配置檔案""" + try: + with open(self.config_file, 'r', encoding='utf-8') as f: + config_data = json.load(f) + + # 更新配置 + if 'scoring_weights' in config_data: + self.configuration.scoring_weights.update(config_data['scoring_weights']) + if 'calibration_settings' in config_data: + self.configuration.calibration_settings.update(config_data['calibration_settings']) + if 'constraint_thresholds' in config_data: + self.configuration.constraint_thresholds.update(config_data['constraint_thresholds']) + if 'semantic_model_config' in config_data: + self.configuration.semantic_model_config.update(config_data['semantic_model_config']) + if 'data_imputation_rules' in config_data: + self.configuration.data_imputation_rules.update(config_data['data_imputation_rules']) + if 'debug_mode' in config_data: + self.configuration.debug_mode = config_data['debug_mode'] + + print(f"Configuration loaded from {self.config_file}") + + except Exception as e: + print(f"Error loading configuration: {str(e)}") + + def save_configuration(self): + """儲存配置檔案""" + try: + config_data = asdict(self.configuration) + + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(config_data, f, indent=2, ensure_ascii=False) + + print(f"Configuration saved to {self.config_file}") + + except Exception as e: + print(f"Error saving configuration: {str(e)}") + + def get_configuration(self) -> ConfigurationSettings: + """獲取當前配置""" + return self.configuration + + def update_configuration(self, updates: Dict[str, Any]): + """更新配置""" + try: + for key, value in updates.items(): + if hasattr(self.configuration, key): + setattr(self.configuration, key, value) + + print(f"Configuration updated: {list(updates.keys())}") + + except Exception as e: + print(f"Error updating configuration: {str(e)}") + + def get_breed_mapping_summary(self) -> Dict[str, Any]: + """獲取品種映射摘要""" + try: + total_breeds = len(self.breed_standardization) + total_aliases = len(self.breed_aliases) + + # 資料品質分布 + quality_distribution = {} + for breed_data in self.breed_standardization.values(): + for category, quality in breed_data.data_quality_scores.items(): + if category not in quality_distribution: + quality_distribution[category] = {} + quality_name = quality.value + quality_distribution[category][quality_name] = ( + quality_distribution[category].get(quality_name, 0) + 1 + ) + + # 信心度統計 + confidence_stats = {} + for breed_data in self.breed_standardization.values(): + for category, confidence in breed_data.confidence_flags.items(): + if category not in confidence_stats: + confidence_stats[category] = [] + confidence_stats[category].append(confidence) + + confidence_averages = { + category: np.mean(values) for category, values in confidence_stats.items() + } + + return { + 'total_breeds': total_breeds, + 'total_aliases': total_aliases, + 'quality_distribution': quality_distribution, + 'confidence_averages': confidence_averages, + 'configuration_version': self.configuration.version + } + + except Exception as e: + print(f"Error generating breed mapping summary: {str(e)}") + return {'error': str(e)} + +_config_manager = None + +def get_config_manager() -> ConfigManager: + """獲取全局配置管理器""" + global _config_manager + if _config_manager is None: + _config_manager = ConfigManager() + return _config_manager + +def get_standardized_breed_data(breed: str) -> Optional[BreedStandardization]: + """獲取標準化品種資料""" + manager = get_config_manager() + return manager.get_standardized_breed_data(breed) + +def get_breed_with_imputation(breed: str) -> BreedStandardization: + """獲取應用補進後的品種資料""" + manager = get_config_manager() + return manager.apply_data_imputation(breed) + +def get_system_configuration() -> ConfigurationSettings: + """系統配置""" + manager = get_config_manager() + return manager.get_configuration() diff --git a/constraint_manager.py b/constraint_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..d752cb38856f44ea9bca40071913dda07d9854be --- /dev/null +++ b/constraint_manager.py @@ -0,0 +1,852 @@ +import sqlite3 +import json +import numpy as np +from typing import List, Dict, Tuple, Set, Optional, Any +from dataclasses import dataclass, field +from enum import Enum +import traceback +from dog_database import get_dog_description +from dynamic_scoring_config import get_scoring_config +from breed_health_info import breed_health_info +from breed_noise_info import breed_noise_info +from query_understanding import QueryDimensions + +class ConstraintPriority(Enum): + """Constraint priority definitions""" + CRITICAL = 1 # Critical constraints (safety, space) + HIGH = 2 # High priority (activity level, noise) + MODERATE = 3 # Moderate priority (maintenance, experience) + FLEXIBLE = 4 # Flexible constraints (other preferences) + +@dataclass +class ConstraintRule: + """Constraint rule structure""" + name: str + priority: ConstraintPriority + description: str + filter_function: str # Function name + relaxation_allowed: bool = True + safety_critical: bool = False + +@dataclass +class FilterResult: + """Filter result structure""" + passed_breeds: Set[str] + filtered_breeds: Dict[str, str] # breed -> reason + applied_constraints: List[str] + relaxed_constraints: List[str] = field(default_factory=list) + warnings: List[str] = field(default_factory=list) + +class ConstraintManager: + """ + Hierarchical constraint management system + Implements priority-based constraint filtering with progressive constraint relaxation + """ + + def __init__(self): + """Initialize constraint manager""" + self.breed_list = self._load_breed_list() + self.breed_cache = {} # Breed information cache + self.constraint_rules = self._initialize_constraint_rules() + self._warm_cache() + + def _load_breed_list(self) -> List[str]: + """Load breed list from database""" + try: + conn = sqlite3.connect('animal_detector.db') + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") + breeds = [row[0] for row in cursor.fetchall()] + cursor.close() + conn.close() + return breeds + except Exception as e: + print(f"Error loading breed list: {str(e)}") + return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', + 'Bulldog', 'Poodle', 'Beagle', 'Border_Collie', 'Yorkshire_Terrier'] + + def _warm_cache(self): + """Warm up breed information cache""" + for breed in self.breed_list: + self.breed_cache[breed] = self._get_breed_info(breed) + + def _get_breed_info(self, breed: str) -> Dict[str, Any]: + """Get comprehensive breed information""" + if breed in self.breed_cache: + return self.breed_cache[breed] + + try: + # Basic breed information + breed_info = get_dog_description(breed) or {} + + # Health information + health_info = breed_health_info.get(breed, {}) + + # Noise information + noise_info = breed_noise_info.get(breed, {}) + + # Combine all information + combined_info = { + 'breed_name': breed, + 'display_name': breed.replace('_', ' '), + 'size': breed_info.get('Size', '').lower(), + 'exercise_needs': breed_info.get('Exercise Needs', '').lower(), + 'grooming_needs': breed_info.get('Grooming Needs', '').lower(), + 'temperament': breed_info.get('Temperament', '').lower(), + 'good_with_children': breed_info.get('Good with Children', 'Yes'), + 'care_level': breed_info.get('Care Level', '').lower(), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'noise_level': noise_info.get('noise_level', 'moderate').lower(), + 'health_issues': health_info.get('health_notes', ''), + 'raw_breed_info': breed_info, + 'raw_health_info': health_info, + 'raw_noise_info': noise_info + } + + self.breed_cache[breed] = combined_info + return combined_info + + except Exception as e: + print(f"Error getting breed info for {breed}: {str(e)}") + return {'breed_name': breed, 'display_name': breed.replace('_', ' ')} + + def _initialize_constraint_rules(self) -> List[ConstraintRule]: + """Initialize constraint rules""" + return [ + # Priority 1: Critical constraints (cannot be violated) + ConstraintRule( + name="apartment_size_constraint", + priority=ConstraintPriority.CRITICAL, + description="Apartment living space size restrictions", + filter_function="filter_apartment_size", + relaxation_allowed=False, + safety_critical=True + ), + ConstraintRule( + name="child_safety_constraint", + priority=ConstraintPriority.CRITICAL, + description="Child safety compatibility", + filter_function="filter_child_safety", + relaxation_allowed=False, + safety_critical=True + ), + ConstraintRule( + name="severe_allergy_constraint", + priority=ConstraintPriority.CRITICAL, + description="Severe allergy restrictions", + filter_function="filter_severe_allergies", + relaxation_allowed=False, + safety_critical=True + ), + + # Priority 2: High priority constraints + ConstraintRule( + name="exercise_constraint", + priority=ConstraintPriority.HIGH, + description="Exercise requirement mismatch", + filter_function="filter_exercise_mismatch", + relaxation_allowed=False, + safety_critical=False + ), + ConstraintRule( + name="size_bias_correction", + priority=ConstraintPriority.MODERATE, + description="Correct size bias in moderate lifestyle matches", + filter_function="filter_size_bias", + relaxation_allowed=True, + safety_critical=False + ), + ConstraintRule( + name="low_activity_constraint", + priority=ConstraintPriority.HIGH, + description="Low activity level restrictions", + filter_function="filter_low_activity", + relaxation_allowed=True + ), + ConstraintRule( + name="quiet_requirement_constraint", + priority=ConstraintPriority.HIGH, + description="Quiet environment requirements", + filter_function="filter_quiet_requirements", + relaxation_allowed=True + ), + ConstraintRule( + name="space_compatibility_constraint", + priority=ConstraintPriority.HIGH, + description="Living space compatibility", + filter_function="filter_space_compatibility", + relaxation_allowed=True + ), + + # Priority 3: Moderate constraints + ConstraintRule( + name="grooming_preference_constraint", + priority=ConstraintPriority.MODERATE, + description="Grooming maintenance preferences", + filter_function="filter_grooming_preferences", + relaxation_allowed=True + ), + ConstraintRule( + name="experience_level_constraint", + priority=ConstraintPriority.MODERATE, + description="Ownership experience requirements", + filter_function="filter_experience_level", + relaxation_allowed=True + ), + + # Priority 4: Flexible constraints + ConstraintRule( + name="size_preference_constraint", + priority=ConstraintPriority.FLEXIBLE, + description="Size preferences", + filter_function="filter_size_preferences", + relaxation_allowed=True + ) + ] + + def apply_constraints(self, dimensions: QueryDimensions, + min_candidates: int = 12) -> FilterResult: + """ + Apply constraint filtering + + Args: + dimensions: Query dimensions + min_candidates: Minimum number of candidate breeds + + Returns: + FilterResult: Filtering results + """ + try: + # Start with all breeds + candidates = set(self.breed_list) + filtered_breeds = {} + applied_constraints = [] + relaxed_constraints = [] + warnings = [] + + # Apply constraints in priority order + for priority in [ConstraintPriority.CRITICAL, ConstraintPriority.HIGH, + ConstraintPriority.MODERATE, ConstraintPriority.FLEXIBLE]: + + # Get constraint rules for this priority level + priority_rules = [rule for rule in self.constraint_rules + if rule.priority == priority] + + for rule in priority_rules: + # Check if this constraint should be applied + if self._should_apply_constraint(rule, dimensions): + # Apply constraint + before_count = len(candidates) + filter_func = getattr(self, rule.filter_function) + new_filtered = filter_func(candidates, dimensions) + + # Update candidate list + candidates -= set(new_filtered.keys()) + filtered_breeds.update(new_filtered) + applied_constraints.append(rule.name) + + print(f"Applied {rule.name}: {before_count} -> {len(candidates)} candidates") + + # Check if constraint relaxation is needed + if (len(candidates) < min_candidates and + rule.relaxation_allowed and not rule.safety_critical): + + # Constraint relaxation + # candidates.update(new_filtered.keys()) + relaxed_constraints.append(rule.name) + warnings.append(f"Relaxed {rule.description} to maintain diversity") + + print(f"Relaxed {rule.name}: restored to {len(candidates)} candidates") + + # If too few candidates after critical constraints, warn but don't relax + if (priority == ConstraintPriority.CRITICAL and + len(candidates) < min_candidates): + warnings.append(f"Critical constraints resulted in only {len(candidates)} candidates") + + # Final safety net: ensure at least some candidate breeds + if len(candidates) == 0: + warnings.append("All breeds filtered out, returning top safe breeds") + candidates = self._get_emergency_candidates() + + return FilterResult( + passed_breeds=candidates, + filtered_breeds=filtered_breeds, + applied_constraints=applied_constraints, + relaxed_constraints=relaxed_constraints, + warnings=warnings + ) + + except Exception as e: + print(f"Error applying constraints: {str(e)}") + print(traceback.format_exc()) + return FilterResult( + passed_breeds=set(self.breed_list[:min_candidates]), + filtered_breeds={}, + applied_constraints=[], + warnings=[f"Constraint application failed: {str(e)}"] + ) + + def _should_apply_constraint(self, rule: ConstraintRule, + dimensions: QueryDimensions) -> bool: + """Enhanced constraint application logic""" + + # Always apply size constraints when space is mentioned + if rule.name == "apartment_size_constraint": + return any(term in dimensions.spatial_constraints + for term in ['apartment', 'small', 'studio', 'condo']) + + # Apply exercise constraints when activity level is specified + if rule.name == "exercise_constraint": + return len(dimensions.activity_level) > 0 or \ + any(term in str(dimensions.spatial_constraints) + for term in ['apartment', 'small']) + + # Child safety constraint + if rule.name == "child_safety_constraint": + return 'children' in dimensions.family_context + + # Severe allergy constraint + if rule.name == "severe_allergy_constraint": + return 'hypoallergenic' in dimensions.special_requirements + + # Low activity constraint + if rule.name == "low_activity_constraint": + return 'low' in dimensions.activity_level + + # Quiet requirement constraint + if rule.name == "quiet_requirement_constraint": + return 'low' in dimensions.noise_preferences + + # Space compatibility constraint + if rule.name == "space_compatibility_constraint": + return ('apartment' in dimensions.spatial_constraints or + 'house' in dimensions.spatial_constraints) + + # Grooming preference constraint + if rule.name == "grooming_preference_constraint": + return len(dimensions.maintenance_level) > 0 + + # Experience level constraint + if rule.name == "experience_level_constraint": + return 'first_time' in dimensions.special_requirements + + # Size preference constraint + if rule.name == "size_preference_constraint": + return len(dimensions.size_preferences) > 0 + + return False + + def filter_apartment_size(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Enhanced apartment size filtering with strict enforcement""" + filtered = {} + + # Extract living space type with better pattern matching + living_space = self._extract_living_space(dimensions) + space_requirements = self._get_space_requirements(living_space) + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + breed_size = self._normalize_breed_size(breed_info.get('size', 'Medium')) + exercise_needs = self._normalize_exercise_level(breed_info.get('exercise_needs', 'Moderate')) + + # Dynamic space compatibility check + compatibility_score = self._calculate_space_compatibility( + breed_size, exercise_needs, space_requirements + ) + + # Apply threshold-based filtering + if compatibility_score < 0.3: # Strict threshold for poor matches + reason = self._generate_filter_reason(breed_size, exercise_needs, living_space) + filtered[breed] = reason + continue + + return filtered + + def _extract_living_space(self, dimensions: QueryDimensions) -> str: + """Extract living space type from dimensions""" + spatial_text = ' '.join(dimensions.spatial_constraints).lower() + + if any(term in spatial_text for term in ['apartment', 'small apartment', 'studio', 'condo']): + return 'apartment' + elif any(term in spatial_text for term in ['small house', 'townhouse']): + return 'small_house' + elif any(term in spatial_text for term in ['medium house', 'medium-sized']): + return 'medium_house' + elif any(term in spatial_text for term in ['large house', 'big house']): + return 'large_house' + else: + return 'medium_house' # Default assumption + + def _get_space_requirements(self, living_space: str) -> Dict[str, float]: + """Get space requirements for different living situations""" + requirements = { + 'apartment': {'min_space': 1.0, 'yard_bonus': 0.0, 'exercise_penalty': 1.5}, + 'small_house': {'min_space': 1.5, 'yard_bonus': 0.2, 'exercise_penalty': 1.2}, + 'medium_house': {'min_space': 2.0, 'yard_bonus': 0.3, 'exercise_penalty': 1.0}, + 'large_house': {'min_space': 3.0, 'yard_bonus': 0.5, 'exercise_penalty': 0.8} + } + return requirements.get(living_space, requirements['medium_house']) + + def _normalize_breed_size(self, size: str) -> str: + """Normalize breed size to standard categories""" + size_lower = size.lower() + if any(term in size_lower for term in ['toy', 'tiny']): + return 'toy' + elif 'small' in size_lower: + return 'small' + elif 'medium' in size_lower: + return 'medium' + elif 'large' in size_lower: + return 'large' + elif any(term in size_lower for term in ['giant', 'extra large']): + return 'giant' + else: + return 'medium' # Default + + def _normalize_exercise_level(self, exercise: str) -> str: + """Normalize exercise level to standard categories""" + exercise_lower = exercise.lower() + if any(term in exercise_lower for term in ['very high', 'extreme', 'intense']): + return 'very_high' + elif 'high' in exercise_lower: + return 'high' + elif 'moderate' in exercise_lower: + return 'moderate' + elif any(term in exercise_lower for term in ['low', 'minimal']): + return 'low' + else: + return 'moderate' # Default + + def _calculate_space_compatibility(self, breed_size: str, exercise_level: str, space_req: Dict[str, float]) -> float: + """Calculate dynamic space compatibility score""" + # Size-space compatibility matrix (dynamic, not hardcoded) + size_factors = { + 'toy': 0.5, 'small': 1.0, 'medium': 1.5, 'large': 2.5, 'giant': 4.0 + } + + exercise_factors = { + 'low': 1.0, 'moderate': 1.3, 'high': 1.8, 'very_high': 2.5 + } + + breed_space_need = size_factors[breed_size] * exercise_factors[exercise_level] + available_space = space_req['min_space'] + + # Calculate compatibility ratio + compatibility = available_space / breed_space_need + + # Apply exercise penalty for high-energy breeds in small spaces + if exercise_level in ['high', 'very_high'] and available_space < 2.0: + compatibility *= (1.0 - space_req['exercise_penalty'] * 0.3) + + return max(0.0, min(1.0, compatibility)) + + def _generate_filter_reason(self, breed_size: str, exercise_level: str, living_space: str) -> str: + """Generate dynamic filtering reason""" + if breed_size in ['giant', 'large'] and living_space == 'apartment': + return f"{breed_size.title()} breed not suitable for apartment living" + elif exercise_level in ['high', 'very_high'] and living_space in ['apartment', 'small_house']: + return f"High-energy breed needs more space than {living_space.replace('_', ' ')}" + else: + return f"Space and exercise requirements exceed {living_space.replace('_', ' ')} capacity" + + def filter_child_safety(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Child safety filtering""" + filtered = {} + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + good_with_children = breed_info.get('good_with_children', 'Yes') + size = breed_info.get('size', '') + temperament = breed_info.get('temperament', '') + + # Breeds explicitly not suitable for children + if good_with_children == 'No': + filtered[breed] = "Not suitable for children" + # Large breeds without clear child compatibility indicators should be cautious + elif ('large' in size and good_with_children != 'Yes' and + any(trait in temperament for trait in ['aggressive', 'dominant', 'protective'])): + filtered[breed] = "Large breed with uncertain child compatibility" + + return filtered + + def filter_severe_allergies(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Severe allergy filtering""" + filtered = {} + + # High shedding breed list (should be adjusted based on actual database) + high_shedding_breeds = { + 'German_Shepherd', 'Golden_Retriever', 'Labrador_Retriever', + 'Husky', 'Akita', 'Bernese_Mountain_Dog' + } + + for breed in list(candidates): + if breed in high_shedding_breeds: + filtered[breed] = "High shedding breed not suitable for allergies" + + return filtered + + def filter_low_activity(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Low activity level filtering""" + filtered = {} + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + exercise_needs = breed_info.get('exercise_needs', '') + temperament = breed_info.get('temperament', '') + + # High exercise requirement breeds + if 'high' in exercise_needs or 'very high' in exercise_needs: + filtered[breed] = "High exercise requirements unsuitable for low activity lifestyle" + # Working dogs, sporting dogs, herding dogs typically need substantial exercise + elif any(trait in temperament for trait in ['working', 'sporting', 'herding', 'energetic']): + filtered[breed] = "High-energy breed requiring substantial daily exercise" + + return filtered + + def filter_quiet_requirements(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Quiet requirement filtering""" + filtered = {} + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + noise_level = breed_info.get('noise_level', 'moderate').lower() + temperament = breed_info.get('temperament', '') + + # High noise level breeds + if 'high' in noise_level or 'loud' in noise_level: + filtered[breed] = "High noise level unsuitable for quiet requirements" + # Terriers and hounds are typically more vocal + elif ('terrier' in breed.lower() or 'hound' in breed.lower() or + 'vocal' in temperament): + filtered[breed] = "Breed group typically more vocal than desired" + + return filtered + + def filter_space_compatibility(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Space compatibility filtering""" + filtered = {} + + # This function provides more refined space matching + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + size = breed_info.get('size', '') + exercise_needs = breed_info.get('exercise_needs', '') + + # If house is specified but breed is too small, may not be optimal choice (soft constraint) + if ('house' in dimensions.spatial_constraints and + 'tiny' in size and 'guard' in dimensions.special_requirements): + filtered[breed] = "Very small breed may not meet guard dog requirements for house" + + return filtered + + def filter_grooming_preferences(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Grooming preference filtering""" + filtered = {} + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + grooming_needs = breed_info.get('grooming_needs', '') + + # Low maintenance needed but breed requires high maintenance + if ('low' in dimensions.maintenance_level and + 'high' in grooming_needs): + filtered[breed] = "High grooming requirements exceed maintenance preferences" + # High maintenance preference but breed is too simple (rarely applicable) + elif ('high' in dimensions.maintenance_level and + 'low' in grooming_needs): + # Usually don't filter out, as low maintenance is always good + pass + + return filtered + + def filter_experience_level(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Experience level filtering""" + filtered = {} + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + care_level = breed_info.get('care_level', '') + temperament = breed_info.get('temperament', '') + + # Beginners not suitable for high maintenance or difficult breeds + if 'first_time' in dimensions.special_requirements: + if ('high' in care_level or 'expert' in care_level or + any(trait in temperament for trait in + ['stubborn', 'independent', 'dominant', 'challenging'])): + filtered[breed] = "High care requirements unsuitable for first-time owners" + + return filtered + + def filter_size_preferences(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Size preference filtering""" + filtered = {} + + # This is a soft constraint, usually won't completely exclude + size_preferences = dimensions.size_preferences + + if not size_preferences: + return filtered + + for breed in list(candidates): + breed_info = self.breed_cache.get(breed, {}) + breed_size = breed_info.get('size', '') + + # Check if matches preferences + size_match = False + for preferred_size in size_preferences: + if preferred_size in breed_size: + size_match = True + break + + # Since this is a flexible constraint, usually won't filter out, only reflected in scores + # But if user is very explicit (e.g., only wants small dogs), can filter + if not size_match and len(size_preferences) == 1: + # Only filter when user has very explicit preference for single size + preferred = size_preferences[0] + if ((preferred == 'small' and 'large' in breed_size) or + (preferred == 'large' and 'small' in breed_size)): + filtered[breed] = f"Size mismatch: prefer {preferred} but breed is {breed_size}" + + return filtered + + def filter_exercise_mismatch(self, candidates: Set[str], + dimensions: QueryDimensions) -> Dict[str, str]: + """Filter breeds with severe exercise mismatches using dynamic thresholds""" + filtered = {} + + # Extract user exercise profile dynamically + user_profile = self._extract_exercise_profile(dimensions) + compatibility_threshold = self._get_exercise_threshold(user_profile) + + for breed in candidates: + breed_info = self.breed_cache.get(breed, {}) + breed_exercise_level = self._normalize_exercise_level(breed_info.get('exercise_needs', 'Moderate')) + + # Calculate exercise compatibility score + compatibility = self._calculate_exercise_compatibility( + user_profile, breed_exercise_level + ) + + # Apply threshold-based filtering + if compatibility < compatibility_threshold: + reason = self._generate_exercise_filter_reason(user_profile, breed_exercise_level) + filtered[breed] = reason + + return filtered + + def _extract_exercise_profile(self, dimensions: QueryDimensions) -> Dict[str, str]: + """Extract comprehensive user exercise profile""" + activity_text = ' '.join(dimensions.activity_level).lower() + spatial_text = ' '.join(dimensions.spatial_constraints).lower() + + # Determine exercise level + if any(term in activity_text for term in ['don\'t exercise', 'minimal', 'low', 'light walks']): + level = 'low' + elif any(term in activity_text for term in ['hiking', 'running', 'active', 'athletic']): + level = 'high' + elif any(term in activity_text for term in ['30 minutes', 'moderate', 'balanced']): + level = 'moderate' + else: + # Infer from living space + if 'apartment' in spatial_text: + level = 'low_moderate' + else: + level = 'moderate' + + # Determine time commitment + if any(term in activity_text for term in ['30 minutes', 'half hour']): + time = 'limited' + elif any(term in activity_text for term in ['hiking', 'outdoor activities']): + time = 'extensive' + else: + time = 'moderate' + + return {'level': level, 'time': time} + + def _get_exercise_threshold(self, user_profile: Dict[str, str]) -> float: + """Get dynamic threshold based on user profile""" + base_threshold = 0.4 + + # Adjust threshold based on user constraints + if user_profile['level'] == 'low': + base_threshold = 0.6 # Stricter for low-activity users + elif user_profile['level'] == 'high': + base_threshold = 0.3 # More lenient for active users + + return base_threshold + + def _calculate_exercise_compatibility(self, user_profile: Dict[str, str], breed_level: str) -> float: + """Calculate dynamic exercise compatibility""" + # Exercise level compatibility matrix + compatibility_matrix = { + 'low': {'low': 1.0, 'moderate': 0.7, 'high': 0.3, 'very_high': 0.1}, + 'low_moderate': {'low': 0.9, 'moderate': 1.0, 'high': 0.5, 'very_high': 0.2}, + 'moderate': {'low': 0.8, 'moderate': 1.0, 'high': 0.8, 'very_high': 0.4}, + 'high': {'low': 0.5, 'moderate': 0.8, 'high': 1.0, 'very_high': 0.9} + } + + user_level = user_profile['level'] + base_compatibility = compatibility_matrix.get(user_level, {}).get(breed_level, 0.5) + + # Adjust for time commitment + if user_profile['time'] == 'limited' and breed_level in ['high', 'very_high']: + base_compatibility *= 0.7 + elif user_profile['time'] == 'extensive' and breed_level == 'low': + base_compatibility *= 0.8 + + return base_compatibility + + def _generate_exercise_filter_reason(self, user_profile: Dict[str, str], breed_level: str) -> str: + """Generate dynamic exercise filtering reason""" + user_level = user_profile['level'] + + if user_level == 'low' and breed_level in ['high', 'very_high']: + return f"High-energy breed unsuitable for low-activity lifestyle" + elif user_level == 'high' and breed_level == 'low': + return f"Low-energy breed may not match active lifestyle requirements" + else: + return f"Exercise requirements mismatch: {user_level} user with {breed_level} breed" + + def filter_size_bias(self, candidates: Set[str], dimensions: QueryDimensions) -> Dict[str, str]: + """Filter to correct size bias for moderate lifestyle users""" + filtered = {} + + # Detect moderate lifestyle indicators + activity_text = ' '.join(dimensions.activity_level).lower() + is_moderate_lifestyle = any(term in activity_text for term in + ['moderate', 'balanced', '30 minutes', 'medium-sized house']) + + if not is_moderate_lifestyle: + return filtered # No filtering needed + + # Count size distribution in candidates + size_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} + total_candidates = len(candidates) + + for breed in candidates: + breed_info = self.breed_cache.get(breed, {}) + breed_size = self._normalize_breed_size(breed_info.get('size', 'Medium')) + size_counts[breed_size] += 1 + + # Check for size bias (too many large/giant breeds) + large_giant_ratio = (size_counts['large'] + size_counts['giant']) / max(total_candidates, 1) + + if large_giant_ratio > 0.6: # More than 60% large/giant breeds + # Filter some large/giant breeds to balance distribution + large_giant_filtered = 0 + target_reduction = int((large_giant_ratio - 0.4) * total_candidates) + + for breed in list(candidates): + if large_giant_filtered >= target_reduction: + break + + breed_info = self.breed_cache.get(breed, {}) + breed_size = self._normalize_breed_size(breed_info.get('size', 'Medium')) + + if breed_size in ['large', 'giant']: + # Check if breed has additional compatibility issues + exercise_level = self._normalize_exercise_level( + breed_info.get('exercise_needs', 'Moderate') + ) + + if breed_size == 'giant' or exercise_level == 'very_high': + filtered[breed] = f"Size bias correction: {breed_size} breed less suitable for moderate lifestyle" + large_giant_filtered += 1 + + return filtered + + def _get_emergency_candidates(self) -> Set[str]: + """Get emergency candidate breeds (safest choices)""" + safe_breeds = { + 'Labrador_Retriever', 'Golden_Retriever', 'Cavalier_King_Charles_Spaniel', + 'Bichon_Frise', 'French_Bulldog', 'Boston_Terrier', 'Pug' + } + + # Only return breeds that exist in the database + available_safe_breeds = safe_breeds.intersection(set(self.breed_list)) + + if not available_safe_breeds: + # If even safe breeds are not available, return first few breeds + return set(self.breed_list[:5]) + + return available_safe_breeds + + def get_constraint_summary(self, filter_result: FilterResult) -> Dict[str, Any]: + """Get constraint application summary""" + return { + 'total_breeds': len(self.breed_list), + 'passed_breeds': len(filter_result.passed_breeds), + 'filtered_breeds': len(filter_result.filtered_breeds), + 'applied_constraints': filter_result.applied_constraints, + 'relaxed_constraints': filter_result.relaxed_constraints, + 'warnings': filter_result.warnings, + 'pass_rate': len(filter_result.passed_breeds) / len(self.breed_list), + 'filter_breakdown': self._get_filter_breakdown(filter_result) + } + + def _get_filter_breakdown(self, filter_result: FilterResult) -> Dict[str, int]: + """Get filtering reason breakdown""" + breakdown = {} + + for breed, reason in filter_result.filtered_breeds.items(): + # Simplify reason categorization + if 'apartment' in reason.lower() or 'large' in reason.lower(): + category = 'Size/Space Issues' + elif 'child' in reason.lower(): + category = 'Child Safety' + elif 'allerg' in reason.lower() or 'shed' in reason.lower(): + category = 'Allergy Concerns' + elif 'exercise' in reason.lower() or 'activity' in reason.lower(): + category = 'Exercise/Activity Mismatch' + elif 'noise' in reason.lower() or 'bark' in reason.lower(): + category = 'Noise Issues' + elif 'groom' in reason.lower() or 'maintenance' in reason.lower(): + category = 'Maintenance Requirements' + elif 'experience' in reason.lower() or 'first-time' in reason.lower(): + category = 'Experience Level' + else: + category = 'Other' + + breakdown[category] = breakdown.get(category, 0) + 1 + + return breakdown + +def apply_breed_constraints(dimensions: QueryDimensions, + min_candidates: int = 12) -> FilterResult: + """ + Convenience function: Apply breed constraint filtering + + Args: + dimensions: Query dimensions + min_candidates: Minimum number of candidate breeds + + Returns: + FilterResult: Filtering results + """ + manager = ConstraintManager() + return manager.apply_constraints(dimensions, min_candidates) + +def get_filtered_breeds(dimensions: QueryDimensions) -> Tuple[List[str], Dict[str, Any]]: + """ + Convenience function: Get filtered breed list and summary + + Args: + dimensions: Query dimensions + + Returns: + Tuple: (Filtered breed list, filtering summary) + """ + manager = ConstraintManager() + result = manager.apply_constraints(dimensions) + summary = manager.get_constraint_summary(result) + + return list(result.passed_breeds), summary diff --git a/dimension_score_calculator.py b/dimension_score_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..19d686f5cec6b7f482d5c3dd4f7dd46018deee71 --- /dev/null +++ b/dimension_score_calculator.py @@ -0,0 +1,782 @@ +import traceback +from typing import Dict, Any +from breed_health_info import breed_health_info +from breed_noise_info import breed_noise_info + +class DimensionScoreCalculator: + """ + 維度評分計算器類別 + 負責計算各個維度的具體評分,包含空間、運動、美容、經驗、健康和噪音等維度 + """ + + def __init__(self): + """初始化維度評分計算器""" + pass + + def calculate_space_score(self, size: str, living_space: str, has_yard: bool, exercise_needs: str) -> float: + """ + 計算空間適配性評分 + + 完整實現原始版本的空間計算邏輯,包含: + 1. 動態的基礎分數矩陣 + 2. 強化空間品質評估 + 3. 增加極端情況處理 + 4. 考慮不同空間組合的協同效應 + """ + def get_base_score(): + # 基礎分數矩陣 - 更極端的分數分配 + base_matrix = { + "Small": { + "apartment": { + "no_yard": 0.85, # 小型犬在公寓仍然適合 + "shared_yard": 0.90, # 共享院子提供額外活動空間 + "private_yard": 0.95 # 私人院子最理想 + }, + "house_small": { + "no_yard": 0.80, + "shared_yard": 0.85, + "private_yard": 0.90 + }, + "house_large": { + "no_yard": 0.75, + "shared_yard": 0.80, + "private_yard": 0.85 + } + }, + "Medium": { + "apartment": { + "no_yard": 0.75, + "shared_yard": 0.85, + "private_yard": 0.90 + }, + "house_small": { + "no_yard": 0.80, + "shared_yard": 0.90, + "private_yard": 0.90 + }, + "house_large": { + "no_yard": 0.85, + "shared_yard": 0.90, + "private_yard": 0.95 + } + }, + "Large": { + "apartment": { + "no_yard": 0.70, + "shared_yard": 0.80, + "private_yard": 0.85 + }, + "house_small": { + "no_yard": 0.75, + "shared_yard": 0.85, + "private_yard": 0.90 + }, + "house_large": { + "no_yard": 0.85, + "shared_yard": 0.90, + "private_yard": 1.0 + } + }, + "Giant": { + "apartment": { + "no_yard": 0.65, + "shared_yard": 0.75, + "private_yard": 0.80 + }, + "house_small": { + "no_yard": 0.70, + "shared_yard": 0.80, + "private_yard": 0.85 + }, + "house_large": { + "no_yard": 0.80, + "shared_yard": 0.90, + "private_yard": 1.0 + } + } + } + + yard_type = "private_yard" if has_yard else "no_yard" + return base_matrix.get(size, base_matrix["Medium"])[living_space][yard_type] + + def calculate_exercise_adjustment(): + # 運動需求對空間評分的影響 + exercise_impact = { + "Very High": { + "apartment": -0.10, + "house_small": -0.05, + "house_large": 0 + }, + "High": { + "apartment": -0.08, + "house_small": -0.05, + "house_large": 0 + }, + "Moderate": { + "apartment": -0.05, + "house_small": -0.02, + "house_large": 0 + }, + "Low": { + "apartment": 0.10, + "house_small": 0.05, + "house_large": 0 + } + } + + return exercise_impact.get(exercise_needs, exercise_impact["Moderate"])[living_space] + + def calculate_yard_bonus(): + # 院子效益評估更加細緻 + if not has_yard: + return 0 + + yard_benefits = { + "Giant": { + "Very High": 0.25, + "High": 0.20, + "Moderate": 0.15, + "Low": 0.10 + }, + "Large": { + "Very High": 0.20, + "High": 0.15, + "Moderate": 0.10, + "Low": 0.05 + }, + "Medium": { + "Very High": 0.15, + "High": 0.10, + "Moderate": 0.08, + "Low": 0.05 + }, + "Small": { + "Very High": 0.10, + "High": 0.08, + "Moderate": 0.05, + "Low": 0.03 + } + } + + size_benefits = yard_benefits.get(size, yard_benefits["Medium"]) + return size_benefits.get(exercise_needs, size_benefits["Moderate"]) + + def apply_extreme_case_adjustments(score): + # 處理極端情況 + if size == "Giant" and living_space == "apartment": + return score * 0.85 + + if size == "Large" and living_space == "apartment" and exercise_needs == "Very High": + return score * 0.85 + + if size == "Small" and living_space == "house_large" and exercise_needs == "Low": + return score * 0.9 # 低運動需求的小型犬在大房子可能過於寬敞 + + return score + + # 計算最終分數 + base_score = get_base_score() + exercise_adj = calculate_exercise_adjustment() + yard_bonus = calculate_yard_bonus() + + # 整合所有評分因素 + initial_score = base_score + exercise_adj + yard_bonus + + # 應用極端情況調整 + final_score = apply_extreme_case_adjustments(initial_score) + + # 確保分數在有效範圍內,但允許更極端的結果 + return max(0.05, min(1.0, final_score)) + + def calculate_exercise_score(self, breed_needs: str, exercise_time: int, exercise_type: str, breed_size: str, living_space: str, breed_info: dict = None) -> float: + """ + 計算品種運動需求與使用者運動條件的匹配度。此函數特別著重: + 1. 不同品種的運動耐受度差異 + 2. 運動時間與類型的匹配度 + 3. 極端運動量的嚴格限制 + + Parameters: + breed_needs: 品種的運動需求等級 + exercise_time: 使用者計劃的運動時間(分鐘) + exercise_type: 運動類型(輕度/中度/高度) + + Returns: + float: 0.1到1.0之間的匹配分數 + """ + # 定義每個運動需求等級的具體參數 + exercise_levels = { + 'VERY HIGH': { + 'min': 120, # 最低需求 + 'ideal': 150, # 理想運動量 + 'max': 180, # 最大建議量 + 'type_weights': { # 不同運動類型的權重 + 'active_training': 1.0, + 'moderate_activity': 0.6, + 'light_walks': 0.3 + } + }, + 'HIGH': { + 'min': 90, + 'ideal': 120, + 'max': 150, + 'type_weights': { + 'active_training': 0.9, + 'moderate_activity': 0.8, + 'light_walks': 0.4 + } + }, + 'MODERATE': { + 'min': 45, + 'ideal': 60, + 'max': 90, + 'type_weights': { + 'active_training': 0.7, + 'moderate_activity': 1.0, + 'light_walks': 0.8 + } + }, + 'LOW': { + 'min': 15, + 'ideal': 30, + 'max': 45, + 'type_weights': { + 'active_training': 0.5, + 'moderate_activity': 0.8, + 'light_walks': 1.0 + } + } + } + + # 獲取品種的運動參數 + breed_level = exercise_levels.get(breed_needs.upper(), exercise_levels['MODERATE']) + + # 計算時間匹配度 + def calculate_time_score(): + """計算運動時間的匹配度,特別處理過度運動的情況""" + if exercise_time < breed_level['min']: + # 運動不足的嚴格懲罰 + deficit_ratio = exercise_time / breed_level['min'] + return max(0.1, deficit_ratio * 0.4) + + elif exercise_time <= breed_level['ideal']: + # 理想範圍內的漸進提升 + progress = (exercise_time - breed_level['min']) / (breed_level['ideal'] - breed_level['min']) + return 0.6 + (progress * 0.4) + + elif exercise_time <= breed_level['max']: + # 理想到最大範圍的平緩下降 + excess_ratio = (exercise_time - breed_level['ideal']) / (breed_level['max'] - breed_level['ideal']) + return 1.0 - (excess_ratio * 0.2) + + else: + # 過度運動的顯著懲罰 + excess = (exercise_time - breed_level['max']) / breed_level['max'] + # 低運動需求品種的過度運動懲罰更嚴重 + penalty_factor = 1.5 if breed_needs.upper() == 'LOW' else 1.0 + return max(0.1, 0.8 - (excess * 0.5 * penalty_factor)) + + # 計算運動類型匹配度 + def calculate_type_score(): + """評估運動類型的適合度,考慮品種特性""" + base_type_score = breed_level['type_weights'].get(exercise_type, 0.5) + + # 特殊情況處理 + if breed_needs.upper() == 'LOW' and exercise_type == 'active_training': + # 低運動需求品種不適合高強度運動 + base_type_score *= 0.5 + elif breed_needs.upper() == 'VERY HIGH' and exercise_type == 'light_walks': + # 高運動需求品種需要更多強度 + base_type_score *= 0.6 + + return base_type_score + + # 計算最終分數 + time_score = calculate_time_score() + type_score = calculate_type_score() + + # 根據運動需求等級調整權重 + if breed_needs.upper() == 'LOW': + # 低運動需求品種更重視運動類型的合適性 + final_score = (time_score * 0.6) + (type_score * 0.4) + elif breed_needs.upper() == 'VERY HIGH': + # 高運動需求品種更重視運動時間的充足性 + final_score = (time_score * 0.7) + (type_score * 0.3) + else: + final_score = (time_score * 0.65) + (type_score * 0.35) + + if breed_size in ['Large', 'Giant'] and living_space == 'apartment': + if exercise_time >= 120: + final_score = min(1.0, final_score * 1.2) + + # 極端情況的最終調整 + if breed_needs.upper() == 'LOW' and exercise_time > breed_level['max'] * 2: + # 低運動需求品種的過度運動顯著降分 + final_score *= 0.6 + elif breed_needs.upper() == 'VERY HIGH' and exercise_time < breed_level['min'] * 0.5: + # 高運動需求品種運動嚴重不足降分 + final_score *= 0.5 + + return max(0.1, min(1.0, final_score)) + + def calculate_grooming_score(self, breed_needs: str, user_commitment: str, breed_size: str) -> float: + """ + 計算美容需求分數,強化美容維護需求與使用者承諾度的匹配評估。 + 這個函數特別注意品種大小對美容工作的影響,以及不同程度的美容需求對時間投入的要求。 + """ + # 重新設計基礎分數矩陣,讓美容需求的差異更加明顯 + base_scores = { + "High": { + "low": 0.20, # 高需求對低承諾極不合適,顯著降低初始分數 + "medium": 0.65, # 中等承諾仍有挑戰 + "high": 1.0 # 高承諾最適合 + }, + "Moderate": { + "low": 0.45, # 中等需求對低承諾有困難 + "medium": 0.85, # 較好的匹配 + "high": 0.95 # 高承諾會有餘力 + }, + "Low": { + "low": 0.90, # 低需求對低承諾很合適 + "medium": 0.85, # 略微降低以反映可能過度投入 + "high": 0.80 # 可能造成資源浪費 + } + } + + # 取得基礎分數 + base_score = base_scores.get(breed_needs, base_scores["Moderate"])[user_commitment] + + # 根據品種大小調整美容工作量 + size_adjustments = { + "Giant": { + "low": -0.20, # 大型犬的美容工作量顯著增加 + "medium": -0.10, + "high": -0.05 + }, + "Large": { + "low": -0.15, + "medium": -0.05, + "high": 0 + }, + "Medium": { + "low": -0.10, + "medium": -0.05, + "high": 0 + }, + "Small": { + "low": -0.05, + "medium": 0, + "high": 0 + } + } + + # 應用體型調整 + size_adjustment = size_adjustments.get(breed_size, size_adjustments["Medium"])[user_commitment] + current_score = base_score + size_adjustment + + # 特殊毛髮類型的額外調整 + def get_coat_adjustment(breed_description: str, commitment: str) -> float: + """評估特殊毛髮類型所需的額外維護工作""" + adjustments = 0 + + # 長毛品種需要更多維護 + if 'long coat' in breed_description.lower(): + coat_penalties = { + 'low': -0.20, + 'medium': -0.15, + 'high': -0.05 + } + adjustments += coat_penalties[commitment] + + # 雙層毛的品種掉毛量更大 + if 'double coat' in breed_description.lower(): + double_coat_penalties = { + 'low': -0.15, + 'medium': -0.10, + 'high': -0.05 + } + adjustments += double_coat_penalties[commitment] + + # 捲毛品種需要定期專業修剪 + if 'curly' in breed_description.lower(): + curly_penalties = { + 'low': -0.15, + 'medium': -0.10, + 'high': -0.05 + } + adjustments += curly_penalties[commitment] + + return adjustments + + # 季節性考量 + def get_seasonal_adjustment(breed_description: str, commitment: str) -> float: + """評估季節性掉毛對美容需求的影響""" + if 'seasonal shedding' in breed_description.lower(): + seasonal_penalties = { + 'low': -0.15, + 'medium': -0.10, + 'high': -0.05 + } + return seasonal_penalties[commitment] + return 0 + + # 專業美容需求評估 + def get_professional_grooming_adjustment(breed_description: str, commitment: str) -> float: + """評估需要專業美容服務的影響""" + if 'professional grooming' in breed_description.lower(): + grooming_penalties = { + 'low': -0.20, + 'medium': -0.15, + 'high': -0.05 + } + return grooming_penalties[commitment] + return 0 + + # 應用所有額外調整 + coat_adjustment = get_coat_adjustment("", user_commitment) + seasonal_adjustment = get_seasonal_adjustment("", user_commitment) + professional_adjustment = get_professional_grooming_adjustment("", user_commitment) + + final_score = current_score + coat_adjustment + seasonal_adjustment + professional_adjustment + + # 確保分數在有意義的範圍內,但允許更大的差異 + return max(0.1, min(1.0, final_score)) + + def calculate_experience_score(self, care_level: str, user_experience: str, temperament: str) -> float: + """ + 計算使用者經驗與品種需求的匹配分數,更平衡的經驗等級影響 + + 改進重點: + 1. 提高初學者的基礎分數 + 2. 縮小經驗等級間的差距 + 3. 保持適度的區分度 + """ + # 基礎分數矩陣 - 更合理的分數分配 + base_scores = { + "High": { + "beginner": 0.55, # 提高起始分,讓新手也有機會 + "intermediate": 0.80, # 中級玩家有不錯的勝任能力 + "advanced": 0.95 # 資深者幾乎完全勝任 + }, + "Moderate": { + "beginner": 0.65, # 適中難度對新手更友善 + "intermediate": 0.85, # 中級玩家相當適合 + "advanced": 0.90 # 資深者完全勝任 + }, + "Low": { + "beginner": 0.85, # 新手友善品種維持高分 + "intermediate": 0.90, # 中級玩家幾乎完全勝任 + "advanced": 0.90 # 資深者完全勝任 + } + } + + # 取得基礎分數 + score = base_scores.get(care_level, base_scores["Moderate"])[user_experience] + + # 性格評估的權重也需要調整 + temperament_lower = temperament.lower() + temperament_adjustments = 0.0 + + # 根據經驗等級設定不同的特徵評估標準,降低懲罰程度 + if user_experience == "beginner": + difficult_traits = { + 'stubborn': -0.15, # 降低懲罰程度 + 'independent': -0.12, + 'dominant': -0.12, + 'strong-willed': -0.10, + 'protective': -0.10, + 'aloof': -0.08, + 'energetic': -0.08, + 'aggressive': -0.20 # 保持較高懲罰,因為安全考慮 + } + + easy_traits = { + 'gentle': 0.08, # 提高獎勵以平衡 + 'friendly': 0.08, + 'eager to please': 0.10, + 'patient': 0.08, + 'adaptable': 0.08, + 'calm': 0.08 + } + + # 計算特徵調整 + for trait, penalty in difficult_traits.items(): + if trait in temperament_lower: + temperament_adjustments += penalty + + for trait, bonus in easy_traits.items(): + if trait in temperament_lower: + temperament_adjustments += bonus + + # 品種類型特殊評估,降低懲罰程度 + if 'terrier' in temperament_lower: + temperament_adjustments -= 0.10 # 降低懲罰 + elif 'working' in temperament_lower: + temperament_adjustments -= 0.12 + elif 'guard' in temperament_lower: + temperament_adjustments -= 0.12 + + elif user_experience == "intermediate": + moderate_traits = { + 'stubborn': -0.08, + 'independent': -0.05, + 'intelligent': 0.10, + 'athletic': 0.08, + 'versatile': 0.08, + 'protective': -0.05 + } + + for trait, adjustment in moderate_traits.items(): + if trait in temperament_lower: + temperament_adjustments += adjustment + + else: # advanced + advanced_traits = { + 'stubborn': 0.05, + 'independent': 0.05, + 'intelligent': 0.10, + 'protective': 0.05, + 'strong-willed': 0.05 + } + + for trait, bonus in advanced_traits.items(): + if trait in temperament_lower: + temperament_adjustments += bonus + + # 確保最終分數範圍合理 + final_score = max(0.15, min(1.0, score + temperament_adjustments)) + + return final_score + + def calculate_health_score(self, breed_name: str, health_sensitivity: str) -> float: + """ + 計算品種健康分數,加強健康問題的影響力和與使用者敏感度的連結 + + 1. 根據使用者的健康敏感度調整分數 + 2. 更嚴格的健康問題評估 + 3. 考慮多重健康問題的累積效應 + 4. 加入遺傳疾病的特別考量 + """ + try: + if breed_name not in breed_health_info: + return 0.5 + except ImportError: + return 0.5 + + health_notes = breed_health_info[breed_name]['health_notes'].lower() + + # 嚴重健康問題 - 加重扣分 + severe_conditions = { + 'hip dysplasia': -0.20, # 髖關節發育不良,影響生活品質 + 'heart disease': -0.15, # 心臟疾病,需要長期治療 + 'progressive retinal atrophy': -0.15, # 進行性視網膜萎縮,導致失明 + 'bloat': -0.18, # 胃扭轉,致命風險 + 'epilepsy': -0.15, # 癲癇,需要長期藥物控制 + 'degenerative myelopathy': -0.15, # 脊髓退化,影響行動能力 + 'von willebrand disease': -0.12 # 血液凝固障礙 + } + + # 中度健康問題 - 適度扣分 + moderate_conditions = { + 'allergies': -0.12, # 過敏問題,需要持續關注 + 'eye problems': -0.15, # 眼睛問題,可能需要手術 + 'joint problems': -0.15, # 關節問題,影響運動能力 + 'hypothyroidism': -0.12, # 甲狀腺功能低下,需要藥物治療 + 'ear infections': -0.10, # 耳道感染,需要定期清理 + 'skin issues': -0.12 # 皮膚問題,需要特殊護理 + } + + # 輕微健康問題 - 輕微扣分 + minor_conditions = { + 'dental issues': -0.08, # 牙齒問題,需要定期護理 + 'weight gain tendency': -0.08, # 易胖體質,需要控制飲食 + 'minor allergies': -0.06, # 輕微過敏,可控制 + 'seasonal allergies': -0.06 # 季節性過敏 + } + + # 計算基礎健康分數 + health_score = 1.0 + + # 健康問題累積效應計算 + condition_counts = { + 'severe': 0, + 'moderate': 0, + 'minor': 0 + } + + # 計算各等級健康問題的數量和影響 + for condition, penalty in severe_conditions.items(): + if condition in health_notes: + health_score += penalty + condition_counts['severe'] += 1 + + for condition, penalty in moderate_conditions.items(): + if condition in health_notes: + health_score += penalty + condition_counts['moderate'] += 1 + + for condition, penalty in minor_conditions.items(): + if condition in health_notes: + health_score += penalty + condition_counts['minor'] += 1 + + # 多重問題的額外懲罰(累積效應) + if condition_counts['severe'] > 1: + health_score *= (0.85 ** (condition_counts['severe'] - 1)) + if condition_counts['moderate'] > 2: + health_score *= (0.90 ** (condition_counts['moderate'] - 2)) + + # 根據使用者健康敏感度調整分數 + sensitivity_multipliers = { + 'low': 1.1, # 較不在意健康問題 + 'medium': 1.0, # 標準評估 + 'high': 0.85 # 非常注重健康問題 + } + + health_score *= sensitivity_multipliers.get(health_sensitivity, 1.0) + + # 壽命影響評估 + try: + lifespan = breed_health_info[breed_name].get('average_lifespan', '10-12') + years = float(lifespan.split('-')[0]) + if years < 8: + health_score *= 0.85 # 短壽命顯著降低分數 + elif years < 10: + health_score *= 0.92 # 較短壽命輕微降低分數 + elif years > 13: + health_score *= 1.1 # 長壽命適度加分 + except: + pass + + # 特殊健康優勢 + if 'generally healthy' in health_notes or 'hardy breed' in health_notes: + health_score *= 1.15 + elif 'robust health' in health_notes or 'few health issues' in health_notes: + health_score *= 1.1 + + # 確保分數在合理範圍內,但允許更大的分數差異 + return max(0.1, min(1.0, health_score)) + + def calculate_noise_score(self, breed_name: str, noise_tolerance: str, living_space: str, has_children: bool, children_age: str) -> float: + """ + 計算品種噪音分數,特別加強噪音程度與生活環境的關聯性評估,很多人棄養就是因為叫聲 + """ + try: + if breed_name not in breed_noise_info: + return 0.5 + except ImportError: + return 0.5 + + noise_info = breed_noise_info[breed_name] + noise_level = noise_info['noise_level'].lower() + noise_notes = noise_info['noise_notes'].lower() + + # 重新設計基礎噪音分數矩陣,考慮不同情境下的接受度 + base_scores = { + 'low': { + 'low': 1.0, # 安靜的狗對低容忍完美匹配 + 'medium': 0.95, # 安靜的狗對一般容忍很好 + 'high': 0.90 # 安靜的狗對高容忍當然可以 + }, + 'medium': { + 'low': 0.60, # 一般吠叫對低容忍較困難 + 'medium': 0.90, # 一般吠叫對一般容忍可接受 + 'high': 0.95 # 一般吠叫對高容忍很好 + }, + 'high': { + 'low': 0.25, # 愛叫的狗對低容忍極不適合 + 'medium': 0.65, # 愛叫的狗對一般容忍有挑戰 + 'high': 0.90 # 愛叫的狗對高容忍可以接受 + }, + 'varies': { + 'low': 0.50, # 不確定的情況對低容忍風險較大 + 'medium': 0.75, # 不確定的情況對一般容忍可嘗試 + 'high': 0.85 # 不確定的情況對高容忍問題較小 + } + } + + # 取得基礎分數 + base_score = base_scores.get(noise_level, {'low': 0.6, 'medium': 0.75, 'high': 0.85})[noise_tolerance] + + # 吠叫原因評估,根據環境調整懲罰程度 + barking_penalties = { + 'separation anxiety': { + 'apartment': -0.30, # 在公寓對鄰居影響更大 + 'house_small': -0.25, + 'house_large': -0.20 + }, + 'excessive barking': { + 'apartment': -0.25, + 'house_small': -0.20, + 'house_large': -0.15 + }, + 'territorial': { + 'apartment': -0.20, # 在公寓更容易被觸發 + 'house_small': -0.15, + 'house_large': -0.10 + }, + 'alert barking': { + 'apartment': -0.15, # 公寓環境刺激較多 + 'house_small': -0.10, + 'house_large': -0.08 + }, + 'attention seeking': { + 'apartment': -0.15, + 'house_small': -0.12, + 'house_large': -0.10 + } + } + + # 計算環境相關的吠叫懲罰 + barking_penalty = 0 + for trigger, penalties in barking_penalties.items(): + if trigger in noise_notes: + barking_penalty += penalties.get(living_space, -0.15) + + # 特殊情況評估 + special_adjustments = 0 + if has_children: + # 孩童年齡相關調整 + child_age_adjustments = { + 'toddler': { + 'high': -0.20, # 幼童對吵鬧更敏感 + 'medium': -0.15, + 'low': -0.05 + }, + 'school_age': { + 'high': -0.15, + 'medium': -0.10, + 'low': -0.05 + }, + 'teenager': { + 'high': -0.10, + 'medium': -0.05, + 'low': -0.02 + } + } + + # 根據孩童年齡和噪音等級調整 + age_adj = child_age_adjustments.get(children_age, + child_age_adjustments['school_age']) + special_adjustments += age_adj.get(noise_level, -0.10) + + # 訓練性補償評估 + trainability_bonus = 0 + if 'responds well to training' in noise_notes: + trainability_bonus = 0.12 + elif 'can be trained' in noise_notes: + trainability_bonus = 0.08 + elif 'difficult to train' in noise_notes: + trainability_bonus = 0.02 + + # 夜間吠叫特別考量 + if 'night barking' in noise_notes or 'howls' in noise_notes: + if living_space == 'apartment': + special_adjustments -= 0.15 + elif living_space == 'house_small': + special_adjustments -= 0.10 + else: + special_adjustments -= 0.05 + + # 計算最終分數,確保更大的分數範圍 + final_score = base_score + barking_penalty + special_adjustments + trainability_bonus + return max(0.1, min(1.0, final_score)) diff --git a/dynamic_scoring_config.py b/dynamic_scoring_config.py new file mode 100644 index 0000000000000000000000000000000000000000..d3cab14710b3a81716ef7cf86d1832b1a65008b4 --- /dev/null +++ b/dynamic_scoring_config.py @@ -0,0 +1,410 @@ +from typing import Dict, List, Any, Optional +from dataclasses import dataclass +import json +import os + +@dataclass +class DimensionConfig: + """維度配置""" + name: str + base_weight: float + priority_multiplier: Dict[str, float] + compatibility_matrix: Dict[str, Dict[str, float]] + threshold_values: Dict[str, float] + description: str + + +@dataclass +class ConstraintConfig: + """約束配置""" + name: str + condition_keywords: List[str] + elimination_threshold: float + penalty_factors: Dict[str, float] + exemption_conditions: List[str] + description: str + + +@dataclass +class ScoringProfile: + """評分配置檔""" + profile_name: str + dimensions: List[DimensionConfig] + constraints: List[ConstraintConfig] + normalization_method: str + bias_correction_rules: Dict[str, Any] + ui_preferences: Dict[str, Any] + + +class DynamicScoringConfig: + """動態評分配置管理器""" + + def __init__(self, config_path: Optional[str] = None): + self.config_path = config_path or self._get_default_config_path() + self.current_profile = self._load_default_profile() + self.custom_profiles = {} + + def _get_default_config_path(self) -> str: + """獲取默認配置路徑""" + return os.path.join(os.path.dirname(__file__), 'scoring_configs') + + def _load_default_profile(self) -> ScoringProfile: + """載入預設評分配置""" + # 空間相容性維度配置 + space_dimension = DimensionConfig( + name="space_compatibility", + base_weight=0.30, + priority_multiplier={ + "apartment_living": 1.5, + "first_time_owner": 1.2, + "limited_space": 1.4 + }, + compatibility_matrix={ + "apartment": { + "toy": 0.95, "small": 0.90, "medium": 0.50, + "large": 0.15, "giant": 0.05 + }, + "house_small": { + "toy": 0.85, "small": 0.90, "medium": 0.85, + "large": 0.60, "giant": 0.30 + }, + "house_medium": { + "toy": 0.80, "small": 0.85, "medium": 0.95, + "large": 0.85, "giant": 0.60 + }, + "house_large": { + "toy": 0.75, "small": 0.80, "medium": 0.90, + "large": 0.95, "giant": 0.95 + } + }, + threshold_values={ + "elimination_threshold": 0.20, + "warning_threshold": 0.40, + "good_threshold": 0.70 + }, + description="Evaluates breed size compatibility with living space" + ) + + # 運動相容性維度配置 + exercise_dimension = DimensionConfig( + name="exercise_compatibility", + base_weight=0.25, + priority_multiplier={ + "low_activity": 1.6, + "high_activity": 1.3, + "time_limited": 1.4 + }, + compatibility_matrix={ + "low_user": { + "low": 1.0, "moderate": 0.70, "high": 0.30, "very_high": 0.10 + }, + "moderate_user": { + "low": 0.80, "moderate": 1.0, "high": 0.80, "very_high": 0.50 + }, + "high_user": { + "low": 0.60, "moderate": 0.85, "high": 1.0, "very_high": 0.95 + } + }, + threshold_values={ + "severe_mismatch": 0.25, + "moderate_mismatch": 0.50, + "good_match": 0.75 + }, + description="Matches user activity level with breed exercise needs" + ) + + # 噪音相容性維度配置 + noise_dimension = DimensionConfig( + name="noise_compatibility", + base_weight=0.15, + priority_multiplier={ + "apartment_living": 1.8, + "noise_sensitive": 2.0, + "quiet_preference": 1.5 + }, + compatibility_matrix={ + "low_tolerance": { + "quiet": 1.0, "moderate": 0.60, "high": 0.20, "very_high": 0.05 + }, + "moderate_tolerance": { + "quiet": 0.90, "moderate": 1.0, "high": 0.70, "very_high": 0.40 + }, + "high_tolerance": { + "quiet": 0.80, "moderate": 0.90, "high": 1.0, "very_high": 0.85 + } + }, + threshold_values={ + "unacceptable": 0.15, + "concerning": 0.40, + "acceptable": 0.70 + }, + description="Matches breed noise levels with user tolerance" + ) + + # 約束配置 + apartment_constraint = ConstraintConfig( + name="apartment_size_constraint", + condition_keywords=["apartment", "small space", "studio", "condo"], + elimination_threshold=0.15, + penalty_factors={ + "large_breed": 0.70, + "giant_breed": 0.85, + "high_exercise": 0.60 + }, + exemption_conditions=["experienced_owner", "large_apartment"], + description="Eliminates breeds unsuitable for apartment living" + ) + + exercise_constraint = ConstraintConfig( + name="exercise_mismatch_constraint", + condition_keywords=["don't exercise", "low activity", "minimal exercise"], + elimination_threshold=0.20, + penalty_factors={ + "very_high_exercise": 0.80, + "working_breed": 0.60, + "high_energy": 0.70 + }, + exemption_conditions=["dog_park_access", "active_family"], + description="Prevents high-energy breeds for low-activity users" + ) + + # 偏見修正規則 + bias_correction_rules = { + "size_bias": { + "enabled": True, + "detection_threshold": 0.70, # 70%以上大型犬觸發修正 + "correction_strength": 0.60, # 修正強度 + "target_distribution": { + "toy": 0.10, "small": 0.25, "medium": 0.40, + "large": 0.20, "giant": 0.05 + } + }, + "popularity_bias": { + "enabled": True, + "common_breeds_penalty": 0.05, + "rare_breeds_bonus": 0.03 + } + } + + # UI偏好設定 + ui_preferences = { + "ranking_style": "gradient_badges", + "score_display": "percentage_with_bars", + "color_scheme": { + "excellent": "#22C55E", + "good": "#F59E0B", + "moderate": "#6B7280", + "poor": "#EF4444" + }, + "animation_enabled": True, + "detailed_breakdown": True + } + + return ScoringProfile( + profile_name="comprehensive_default", + dimensions=[space_dimension, exercise_dimension, noise_dimension], + constraints=[apartment_constraint, exercise_constraint], + normalization_method="sigmoid_compression", + bias_correction_rules=bias_correction_rules, + ui_preferences=ui_preferences + ) + + def get_dimension_config(self, dimension_name: str) -> Optional[DimensionConfig]: + """獲取維度配置""" + for dim in self.current_profile.dimensions: + if dim.name == dimension_name: + return dim + return None + + def get_constraint_config(self, constraint_name: str) -> Optional[ConstraintConfig]: + """獲取約束配置""" + for constraint in self.current_profile.constraints: + if constraint.name == constraint_name: + return constraint + return None + + def calculate_dynamic_weights(self, user_context: Dict[str, Any]) -> Dict[str, float]: + """根據用戶情境動態計算權重""" + weights = {} + total_weight = 0 + + for dimension in self.current_profile.dimensions: + base_weight = dimension.base_weight + + # 根據用戶情境調整權重 + for context_key, multiplier in dimension.priority_multiplier.items(): + if user_context.get(context_key, False): + base_weight *= multiplier + + weights[dimension.name] = base_weight + total_weight += base_weight + + # 正規化權重 + return {k: v / total_weight for k, v in weights.items()} + + def get_compatibility_score(self, dimension_name: str, + user_category: str, breed_category: str) -> float: + """獲取相容性分數""" + dimension_config = self.get_dimension_config(dimension_name) + if not dimension_config: + return 0.5 + + matrix = dimension_config.compatibility_matrix + if user_category in matrix and breed_category in matrix[user_category]: + return matrix[user_category][breed_category] + + return 0.5 # 預設值 + + def should_eliminate_breed(self, constraint_name: str, + breed_info: Dict[str, Any], + user_input: str) -> tuple[bool, str]: + """判斷是否應該淘汰品種""" + constraint_config = self.get_constraint_config(constraint_name) + if not constraint_config: + return False, "" + + # 檢查觸發條件 + user_input_lower = user_input.lower() + triggered = any(keyword in user_input_lower + for keyword in constraint_config.condition_keywords) + + if not triggered: + return False, "" + + # 檢查豁免條件 + exempted = any(condition in user_input_lower + for condition in constraint_config.exemption_conditions) + + if exempted: + return False, "Exempted due to special conditions" + + # 應用淘汰邏輯(具體實現取決於約束類型) + return self._apply_elimination_logic(constraint_config, breed_info, user_input) + + def _apply_elimination_logic(self, constraint_config: ConstraintConfig, + breed_info: Dict[str, Any], user_input: str) -> tuple[bool, str]: + """應用淘汰邏輯""" + # 根據約束名稱決定具體邏輯 + if constraint_config.name == "apartment_size_constraint": + breed_size = breed_info.get('Size', '').lower() + if any(size in breed_size for size in ['large', 'giant']): + return True, f"Breed size ({breed_size}) unsuitable for apartment" + + elif constraint_config.name == "exercise_mismatch_constraint": + exercise_needs = breed_info.get('Exercise Needs', '').lower() + if any(level in exercise_needs for level in ['very high', 'extreme']): + return True, f"Exercise needs ({exercise_needs}) exceed user capacity" + + return False, "" + + def get_bias_correction_settings(self) -> Dict[str, Any]: + """獲取偏見修正設定""" + return self.current_profile.bias_correction_rules + + def get_ui_preferences(self) -> Dict[str, Any]: + """獲取UI偏好設定""" + return self.current_profile.ui_preferences + + def save_custom_profile(self, profile: ScoringProfile, filename: str): + """保存自定義配置檔""" + if not os.path.exists(self.config_path): + os.makedirs(self.config_path) + + filepath = os.path.join(self.config_path, f"{filename}.json") + + # 將配置檔案轉換為JSON格式 + profile_dict = { + "profile_name": profile.profile_name, + "dimensions": [self._dimension_to_dict(dim) for dim in profile.dimensions], + "constraints": [self._constraint_to_dict(cons) for cons in profile.constraints], + "normalization_method": profile.normalization_method, + "bias_correction_rules": profile.bias_correction_rules, + "ui_preferences": profile.ui_preferences + } + + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(profile_dict, f, indent=2, ensure_ascii=False) + + def load_custom_profile(self, filename: str) -> Optional[ScoringProfile]: + """載入自定義配置檔""" + filepath = os.path.join(self.config_path, f"{filename}.json") + + if not os.path.exists(filepath): + return None + + try: + with open(filepath, 'r', encoding='utf-8') as f: + profile_dict = json.load(f) + + return self._dict_to_profile(profile_dict) + except Exception as e: + print(f"Error loading profile {filename}: {str(e)}") + return None + + def _dimension_to_dict(self, dimension: DimensionConfig) -> Dict[str, Any]: + """將維度配置轉換為字典""" + return { + "name": dimension.name, + "base_weight": dimension.base_weight, + "priority_multiplier": dimension.priority_multiplier, + "compatibility_matrix": dimension.compatibility_matrix, + "threshold_values": dimension.threshold_values, + "description": dimension.description + } + + def _constraint_to_dict(self, constraint: ConstraintConfig) -> Dict[str, Any]: + """將約束配置轉換為字典""" + return { + "name": constraint.name, + "condition_keywords": constraint.condition_keywords, + "elimination_threshold": constraint.elimination_threshold, + "penalty_factors": constraint.penalty_factors, + "exemption_conditions": constraint.exemption_conditions, + "description": constraint.description + } + + def _dict_to_profile(self, profile_dict: Dict[str, Any]) -> ScoringProfile: + """將字典轉換為評分配置檔""" + dimensions = [self._dict_to_dimension(dim) for dim in profile_dict["dimensions"]] + constraints = [self._dict_to_constraint(cons) for cons in profile_dict["constraints"]] + + return ScoringProfile( + profile_name=profile_dict["profile_name"], + dimensions=dimensions, + constraints=constraints, + normalization_method=profile_dict["normalization_method"], + bias_correction_rules=profile_dict["bias_correction_rules"], + ui_preferences=profile_dict["ui_preferences"] + ) + + def _dict_to_dimension(self, dim_dict: Dict[str, Any]) -> DimensionConfig: + """將字典轉換為維度配置""" + return DimensionConfig( + name=dim_dict["name"], + base_weight=dim_dict["base_weight"], + priority_multiplier=dim_dict["priority_multiplier"], + compatibility_matrix=dim_dict["compatibility_matrix"], + threshold_values=dim_dict["threshold_values"], + description=dim_dict["description"] + ) + + def _dict_to_constraint(self, cons_dict: Dict[str, Any]) -> ConstraintConfig: + """將字典轉換為約束配置""" + return ConstraintConfig( + name=cons_dict["name"], + condition_keywords=cons_dict["condition_keywords"], + elimination_threshold=cons_dict["elimination_threshold"], + penalty_factors=cons_dict["penalty_factors"], + exemption_conditions=cons_dict["exemption_conditions"], + description=cons_dict["description"] + ) + +def get_scoring_config() -> DynamicScoringConfig: + """獲取全局評分配置""" + return scoring_config + + +def update_scoring_config(new_config: DynamicScoringConfig): + """更新全局評分配置""" + global scoring_config + scoring_config = new_config diff --git a/multi_head_scorer.py b/multi_head_scorer.py new file mode 100644 index 0000000000000000000000000000000000000000..96430219e5d8ac6e4e6da8499b32b7c5e3663bf1 --- /dev/null +++ b/multi_head_scorer.py @@ -0,0 +1,763 @@ +import numpy as np +import json +from typing import Dict, List, Tuple, Optional, Any, Set +from dataclasses import dataclass, field +from abc import ABC, abstractmethod +import traceback +from sentence_transformers import SentenceTransformer +from sklearn.metrics.pairwise import cosine_similarity +from dog_database import get_dog_description +from breed_health_info import breed_health_info +from breed_noise_info import breed_noise_info +from query_understanding import QueryDimensions +from constraint_manager import FilterResult + +@dataclass +class DimensionalScores: + """多維度評分結果""" + semantic_scores: Dict[str, float] = field(default_factory=dict) + attribute_scores: Dict[str, float] = field(default_factory=dict) + fused_scores: Dict[str, float] = field(default_factory=dict) + bidirectional_scores: Dict[str, float] = field(default_factory=dict) + confidence_weights: Dict[str, float] = field(default_factory=dict) + +@dataclass +class BreedScore: + """品種總體評分結果""" + breed_name: str + final_score: float + dimensional_breakdown: Dict[str, float] = field(default_factory=dict) + semantic_component: float = 0.0 + attribute_component: float = 0.0 + bidirectional_bonus: float = 0.0 + confidence_score: float = 1.0 + explanation: Dict[str, Any] = field(default_factory=dict) + +class ScoringHead(ABC): + """抽象評分頭基類""" + + @abstractmethod + def score_dimension(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions, + dimension_type: str) -> float: + """為特定維度評分""" + pass + +class SemanticScoringHead(ScoringHead): + """語義評分頭""" + + def __init__(self, sbert_model: Optional[SentenceTransformer] = None): + self.sbert_model = sbert_model + self.dimension_embeddings = {} + if self.sbert_model: + self._build_dimension_embeddings() + + def _build_dimension_embeddings(self): + """建立維度模板嵌入""" + dimension_templates = { + 'spatial_apartment': "small apartment living, limited space, no yard, urban environment", + 'spatial_house': "house with yard, outdoor space, suburban living, large property", + 'activity_low': "low energy, minimal exercise needs, calm lifestyle, indoor activities", + 'activity_moderate': "moderate exercise, daily walks, balanced activity level", + 'activity_high': "high energy, vigorous exercise, outdoor sports, active lifestyle", + 'noise_low': "quiet, rarely barks, peaceful, suitable for noise-sensitive environments", + 'noise_moderate': "moderate barking, occasional vocalizations, average noise level", + 'noise_high': "vocal, frequent barking, alert dog, comfortable with noise", + 'size_small': "small compact breed, easy to handle, portable size", + 'size_medium': "medium sized dog, balanced proportions, moderate size", + 'size_large': "large impressive dog, substantial presence, bigger breed", + 'family_children': "child-friendly, gentle with kids, family-oriented, safe around children", + 'family_elderly': "calm companion, gentle nature, suitable for seniors, low maintenance", + 'maintenance_low': "low grooming needs, minimal care requirements, easy maintenance", + 'maintenance_moderate': "regular grooming, moderate care needs, standard maintenance", + 'maintenance_high': "high grooming requirements, professional care, intensive maintenance" + } + + for key, template in dimension_templates.items(): + if self.sbert_model: + embedding = self.sbert_model.encode(template, convert_to_tensor=False) + self.dimension_embeddings[key] = embedding + + def score_dimension(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions, + dimension_type: str) -> float: + """語義維度評分""" + if not self.sbert_model or dimension_type not in self.dimension_embeddings: + return 0.5 # 預設中性分數 + + try: + # 建立品種描述 + breed_description = self._create_breed_description(breed_info, dimension_type) + + # 生成嵌入 + breed_embedding = self.sbert_model.encode(breed_description, convert_to_tensor=False) + dimension_embedding = self.dimension_embeddings[dimension_type] + + # 計算相似度 + similarity = cosine_similarity([breed_embedding], [dimension_embedding])[0][0] + + # 正規化到 0-1 範圍 + normalized_score = (similarity + 1) / 2 # 從 [-1,1] 轉換到 [0,1] + + return max(0.0, min(1.0, normalized_score)) + + except Exception as e: + print(f"Error in semantic scoring for {dimension_type}: {str(e)}") + return 0.5 + + def _create_breed_description(self, breed_info: Dict[str, Any], + dimension_type: str) -> str: + """為特定維度創建品種描述""" + breed_name = breed_info.get('display_name', breed_info.get('breed_name', '')) + + if dimension_type.startswith('spatial_'): + size = breed_info.get('size', 'medium') + exercise = breed_info.get('exercise_needs', 'moderate') + return f"{breed_name} is a {size} dog with {exercise} exercise needs" + + elif dimension_type.startswith('activity_'): + exercise = breed_info.get('exercise_needs', 'moderate') + temperament = breed_info.get('temperament', '') + return f"{breed_name} has {exercise} exercise requirements and {temperament} temperament" + + elif dimension_type.startswith('noise_'): + noise_level = breed_info.get('noise_level', 'moderate') + temperament = breed_info.get('temperament', '') + return f"{breed_name} has {noise_level} noise level and {temperament} nature" + + elif dimension_type.startswith('size_'): + size = breed_info.get('size', 'medium') + return f"{breed_name} is a {size} sized dog breed" + + elif dimension_type.startswith('family_'): + children = breed_info.get('good_with_children', 'Yes') + temperament = breed_info.get('temperament', '') + return f"{breed_name} is {children} with children and has {temperament} temperament" + + elif dimension_type.startswith('maintenance_'): + grooming = breed_info.get('grooming_needs', 'moderate') + care_level = breed_info.get('care_level', 'moderate') + return f"{breed_name} requires {grooming} grooming and {care_level} care level" + + return f"{breed_name} is a dog breed with various characteristics" + +class AttributeScoringHead(ScoringHead): + """屬性評分頭""" + + def __init__(self): + self.scoring_matrices = self._initialize_scoring_matrices() + + def _initialize_scoring_matrices(self) -> Dict[str, Dict[str, float]]: + """初始化評分矩陣""" + return { + 'spatial_scoring': { + # (user_preference, breed_attribute) -> score + ('apartment', 'small'): 1.0, + ('apartment', 'medium'): 0.6, + ('apartment', 'large'): 0.2, + ('apartment', 'giant'): 0.0, + ('house', 'small'): 0.7, + ('house', 'medium'): 0.9, + ('house', 'large'): 1.0, + ('house', 'giant'): 1.0, + }, + 'activity_scoring': { + ('low', 'low'): 1.0, + ('low', 'moderate'): 0.7, + ('low', 'high'): 0.2, + ('low', 'very high'): 0.0, + ('moderate', 'low'): 0.8, + ('moderate', 'moderate'): 1.0, + ('moderate', 'high'): 0.8, + ('high', 'moderate'): 0.7, + ('high', 'high'): 1.0, + ('high', 'very high'): 1.0, + }, + 'noise_scoring': { + ('low', 'low'): 1.0, + ('low', 'moderate'): 0.6, + ('low', 'high'): 0.1, + ('moderate', 'low'): 0.8, + ('moderate', 'moderate'): 1.0, + ('moderate', 'high'): 0.7, + ('high', 'low'): 0.7, + ('high', 'moderate'): 0.9, + ('high', 'high'): 1.0, + }, + 'size_scoring': { + ('small', 'small'): 1.0, + ('small', 'medium'): 0.5, + ('small', 'large'): 0.2, + ('medium', 'small'): 0.6, + ('medium', 'medium'): 1.0, + ('medium', 'large'): 0.6, + ('large', 'medium'): 0.7, + ('large', 'large'): 1.0, + ('large', 'giant'): 0.9, + }, + 'maintenance_scoring': { + ('low', 'low'): 1.0, + ('low', 'moderate'): 0.6, + ('low', 'high'): 0.2, + ('moderate', 'low'): 0.8, + ('moderate', 'moderate'): 1.0, + ('moderate', 'high'): 0.7, + ('high', 'low'): 0.6, + ('high', 'moderate'): 0.8, + ('high', 'high'): 1.0, + } + } + + def score_dimension(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions, + dimension_type: str) -> float: + """屬性維度評分""" + try: + if dimension_type.startswith('spatial_'): + return self._score_spatial_compatibility(breed_info, dimensions) + elif dimension_type.startswith('activity_'): + return self._score_activity_compatibility(breed_info, dimensions) + elif dimension_type.startswith('noise_'): + return self._score_noise_compatibility(breed_info, dimensions) + elif dimension_type.startswith('size_'): + return self._score_size_compatibility(breed_info, dimensions) + elif dimension_type.startswith('family_'): + return self._score_family_compatibility(breed_info, dimensions) + elif dimension_type.startswith('maintenance_'): + return self._score_maintenance_compatibility(breed_info, dimensions) + else: + return 0.5 # 預設中性分數 + + except Exception as e: + print(f"Error in attribute scoring for {dimension_type}: {str(e)}") + return 0.5 + + def _score_spatial_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """空間相容性評分""" + if not dimensions.spatial_constraints: + return 0.5 + + breed_size = breed_info.get('size', 'medium').lower() + total_score = 0.0 + + for spatial_constraint in dimensions.spatial_constraints: + key = (spatial_constraint, breed_size) + score = self.scoring_matrices['spatial_scoring'].get(key, 0.5) + total_score += score + + return total_score / len(dimensions.spatial_constraints) + + def _score_activity_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """活動相容性評分""" + if not dimensions.activity_level: + return 0.5 + + breed_exercise = breed_info.get('exercise_needs', 'moderate').lower() + # 清理品種運動需求字串 + if 'very high' in breed_exercise: + breed_exercise = 'very high' + elif 'high' in breed_exercise: + breed_exercise = 'high' + elif 'low' in breed_exercise: + breed_exercise = 'low' + else: + breed_exercise = 'moderate' + + total_score = 0.0 + for activity_level in dimensions.activity_level: + key = (activity_level, breed_exercise) + score = self.scoring_matrices['activity_scoring'].get(key, 0.5) + total_score += score + + return total_score / len(dimensions.activity_level) + + def _score_noise_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """噪音相容性評分""" + if not dimensions.noise_preferences: + return 0.5 + + breed_noise = breed_info.get('noise_level', 'moderate').lower() + total_score = 0.0 + + for noise_pref in dimensions.noise_preferences: + key = (noise_pref, breed_noise) + score = self.scoring_matrices['noise_scoring'].get(key, 0.5) + total_score += score + + return total_score / len(dimensions.noise_preferences) + + def _score_size_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """尺寸相容性評分""" + if not dimensions.size_preferences: + return 0.5 + + breed_size = breed_info.get('size', 'medium').lower() + total_score = 0.0 + + for size_pref in dimensions.size_preferences: + key = (size_pref, breed_size) + score = self.scoring_matrices['size_scoring'].get(key, 0.5) + total_score += score + + return total_score / len(dimensions.size_preferences) + + def _score_family_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """家庭相容性評分""" + if not dimensions.family_context: + return 0.5 + + good_with_children = breed_info.get('good_with_children', 'Yes') + temperament = breed_info.get('temperament', '').lower() + + total_score = 0.0 + score_count = 0 + + for family_context in dimensions.family_context: + if family_context == 'children': + if good_with_children == 'Yes': + total_score += 1.0 + elif good_with_children == 'No': + total_score += 0.1 + else: + total_score += 0.6 + score_count += 1 + elif family_context == 'elderly': + # 溫和、冷靜的品種適合老年人 + if any(trait in temperament for trait in ['gentle', 'calm', 'docile']): + total_score += 1.0 + elif any(trait in temperament for trait in ['energetic', 'hyperactive']): + total_score += 0.3 + else: + total_score += 0.7 + score_count += 1 + elif family_context == 'single': + # 大多數品種都適合單身人士 + total_score += 0.8 + score_count += 1 + + return total_score / max(1, score_count) + + def _score_maintenance_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """維護相容性評分""" + if not dimensions.maintenance_level: + return 0.5 + + breed_grooming = breed_info.get('grooming_needs', 'moderate').lower() + total_score = 0.0 + + for maintenance_level in dimensions.maintenance_level: + key = (maintenance_level, breed_grooming) + score = self.scoring_matrices['maintenance_scoring'].get(key, 0.5) + total_score += score + + return total_score / len(dimensions.maintenance_level) + +class MultiHeadScorer: + """ + 多頭評分系統 + 結合語義和屬性評分,提供雙向相容性評估 + """ + + def __init__(self, sbert_model: Optional[SentenceTransformer] = None): + self.sbert_model = sbert_model + self.semantic_head = SemanticScoringHead(sbert_model) + self.attribute_head = AttributeScoringHead() + self.dimension_weights = self._initialize_dimension_weights() + self.head_fusion_weights = self._initialize_head_fusion_weights() + + def _initialize_dimension_weights(self) -> Dict[str, float]: + """初始化維度權重""" + return { + 'activity_compatibility': 0.35, # 最高優先級:生活方式匹配 + 'noise_compatibility': 0.25, # 關鍵:居住和諧 + 'spatial_compatibility': 0.15, # 基本:物理約束 + 'family_compatibility': 0.10, # 重要:社交相容性 + 'maintenance_compatibility': 0.10, # 實際:持續護理評估 + 'size_compatibility': 0.05 # 基本:偏好匹配 + } + + def _initialize_head_fusion_weights(self) -> Dict[str, Dict[str, float]]: + """初始化頭融合權重""" + return { + 'activity_compatibility': {'semantic': 0.4, 'attribute': 0.6}, + 'noise_compatibility': {'semantic': 0.3, 'attribute': 0.7}, + 'spatial_compatibility': {'semantic': 0.3, 'attribute': 0.7}, + 'family_compatibility': {'semantic': 0.5, 'attribute': 0.5}, + 'maintenance_compatibility': {'semantic': 0.4, 'attribute': 0.6}, + 'size_compatibility': {'semantic': 0.2, 'attribute': 0.8} + } + + def score_breeds(self, candidate_breeds: Set[str], + dimensions: QueryDimensions) -> List[BreedScore]: + """ + 為候選品種評分 + + Args: + candidate_breeds: 通過約束篩選的候選品種 + dimensions: 查詢維度 + + Returns: + List[BreedScore]: 品種評分結果列表 + """ + try: + breed_scores = [] + + # 為每個品種計算分數 + for breed in candidate_breeds: + breed_info = self._get_breed_info(breed) + score_result = self._score_single_breed(breed_info, dimensions) + breed_scores.append(score_result) + + # 按最終分數排序 + breed_scores.sort(key=lambda x: x.final_score, reverse=True) + + return breed_scores + + except Exception as e: + print(f"Error scoring breeds: {str(e)}") + print(traceback.format_exc()) + return [] + + def _get_breed_info(self, breed: str) -> Dict[str, Any]: + """獲取品種資訊""" + try: + # 基本品種資訊 + breed_info = get_dog_description(breed) or {} + + # 健康資訊 + health_info = breed_health_info.get(breed, {}) + + # 噪音資訊 + noise_info = breed_noise_info.get(breed, {}) + + # 整合資訊 + return { + 'breed_name': breed, + 'display_name': breed.replace('_', ' '), + 'size': breed_info.get('Size', '').lower(), + 'exercise_needs': breed_info.get('Exercise Needs', '').lower(), + 'grooming_needs': breed_info.get('Grooming Needs', '').lower(), + 'temperament': breed_info.get('Temperament', '').lower(), + 'good_with_children': breed_info.get('Good with Children', 'Yes'), + 'care_level': breed_info.get('Care Level', '').lower(), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'noise_level': noise_info.get('noise_level', 'moderate').lower(), + 'description': breed_info.get('Description', ''), + 'raw_breed_info': breed_info, + 'raw_health_info': health_info, + 'raw_noise_info': noise_info + } + except Exception as e: + print(f"Error getting breed info for {breed}: {str(e)}") + return { + 'breed_name': breed, + 'display_name': breed.replace('_', ' ') + } + + def _score_single_breed(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> BreedScore: + """為單一品種評分""" + try: + dimensional_scores = {} + semantic_total = 0.0 + attribute_total = 0.0 + + # 動態權重分配(基於用戶表達的維度) + active_dimensions = self._get_active_dimensions(dimensions) + adjusted_weights = self._adjust_dimension_weights(active_dimensions) + + # 為每個活躍維度評分 + for dimension, weight in adjusted_weights.items(): + # 語義評分 + semantic_score = self.semantic_head.score_dimension( + breed_info, dimensions, dimension + ) + + # 屬性評分 + attribute_score = self.attribute_head.score_dimension( + breed_info, dimensions, dimension + ) + + # 頭融合 + fusion_weights = self.head_fusion_weights.get( + dimension, {'semantic': 0.5, 'attribute': 0.5} + ) + + fused_score = (semantic_score * fusion_weights['semantic'] + + attribute_score * fusion_weights['attribute']) + + dimensional_scores[dimension] = fused_score + semantic_total += semantic_score * weight + attribute_total += attribute_score * weight + + # 雙向相容性評估 + bidirectional_bonus = self._calculate_bidirectional_bonus( + breed_info, dimensions + ) + + # Apply size bias correction + bias_correction = self._calculate_size_bias_correction(breed_info, dimensions) + + # 計算最終分數 + base_score = sum(score * adjusted_weights[dim] + for dim, score in dimensional_scores.items()) + + # Apply corrections + final_score = max(0.0, min(1.0, base_score + bidirectional_bonus + bias_correction)) + + # 信心度評估 + confidence_score = self._calculate_confidence(dimensions) + + return BreedScore( + breed_name=breed_info.get('display_name', breed_info['breed_name']), + final_score=final_score, + dimensional_breakdown=dimensional_scores, + semantic_component=semantic_total, + attribute_component=attribute_total, + bidirectional_bonus=bidirectional_bonus, + confidence_score=confidence_score, + explanation=self._generate_explanation(breed_info, dimensions, dimensional_scores) + ) + + except Exception as e: + print(f"Error scoring breed {breed_info.get('breed_name', 'unknown')}: {str(e)}") + return BreedScore( + breed_name=breed_info.get('display_name', breed_info.get('breed_name', 'Unknown')), + final_score=0.5, + confidence_score=0.0 + ) + + def _get_active_dimensions(self, dimensions: QueryDimensions) -> Set[str]: + """獲取活躍的維度""" + active = set() + + if dimensions.spatial_constraints: + active.add('spatial_compatibility') + if dimensions.activity_level: + active.add('activity_compatibility') + if dimensions.noise_preferences: + active.add('noise_compatibility') + if dimensions.size_preferences: + active.add('size_compatibility') + if dimensions.family_context: + active.add('family_compatibility') + if dimensions.maintenance_level: + active.add('maintenance_compatibility') + + return active + + def _adjust_dimension_weights(self, active_dimensions: Set[str]) -> Dict[str, float]: + """調整維度權重""" + if not active_dimensions: + return self.dimension_weights + + # 只為活躍維度分配權重 + active_weights = {dim: weight for dim, weight in self.dimension_weights.items() + if dim in active_dimensions} + + # 正規化權重總和為 1.0 + total_weight = sum(active_weights.values()) + if total_weight > 0: + active_weights = {dim: weight / total_weight + for dim, weight in active_weights.items()} + + return active_weights + + def _calculate_bidirectional_bonus(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """計算雙向相容性獎勵""" + try: + bonus = 0.0 + + # 正向相容性:品種滿足用戶需求 + forward_compatibility = self._assess_forward_compatibility(breed_info, dimensions) + + # 反向相容性:用戶生活方式適合品種需求 + reverse_compatibility = self._assess_reverse_compatibility(breed_info, dimensions) + + # 雙向獎勵(較為保守) + bonus = min(0.1, (forward_compatibility + reverse_compatibility) * 0.05) + + return bonus + + except Exception as e: + print(f"Error calculating bidirectional bonus: {str(e)}") + return 0.0 + + def _assess_forward_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """評估正向相容性""" + compatibility = 0.0 + + # 空間需求匹配 + if 'apartment' in dimensions.spatial_constraints: + size = breed_info.get('size', '') + if 'small' in size: + compatibility += 0.3 + elif 'medium' in size: + compatibility += 0.1 + + # 活動需求匹配 + if 'low' in dimensions.activity_level: + exercise = breed_info.get('exercise_needs', '') + if 'low' in exercise: + compatibility += 0.3 + elif 'moderate' in exercise: + compatibility += 0.1 + + return compatibility + + def _assess_reverse_compatibility(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions) -> float: + """評估反向相容性""" + compatibility = 0.0 + + # 品種是否能在用戶環境中茁壯成長 + exercise_needs = breed_info.get('exercise_needs', '') + + if 'high' in exercise_needs: + # 高運動需求品種需要確認用戶能提供足夠運動 + if ('high' in dimensions.activity_level or + 'house' in dimensions.spatial_constraints): + compatibility += 0.2 + else: + compatibility -= 0.2 + + # 品種護理需求是否與用戶能力匹配 + grooming_needs = breed_info.get('grooming_needs', '') + if 'high' in grooming_needs: + if 'high' in dimensions.maintenance_level: + compatibility += 0.1 + elif 'low' in dimensions.maintenance_level: + compatibility -= 0.1 + + return compatibility + + def _calculate_size_bias_correction(self, breed_info: Dict, + dimensions: QueryDimensions) -> float: + """Correct systematic bias toward larger breeds""" + breed_size = breed_info.get('size', '').lower() + + # Default no bias correction + correction = 0.0 + + # Detect if user specified moderate/balanced preferences + if any(term in dimensions.activity_level for term in ['moderate', 'balanced', 'average']): + # Penalize extremes + if breed_size in ['giant', 'toy']: + correction = -0.1 + elif breed_size in ['large']: + correction = -0.05 + + # Boost medium breeds for moderate requirements + if 'medium' in breed_size and 'balanced' in str(dimensions.activity_level): + correction = 0.1 + + return correction + + def _calculate_confidence(self, dimensions: QueryDimensions) -> float: + """計算推薦信心度""" + # 基於維度覆蓋率和信心分數計算 + dimension_count = sum([ + len(dimensions.spatial_constraints), + len(dimensions.activity_level), + len(dimensions.noise_preferences), + len(dimensions.size_preferences), + len(dimensions.family_context), + len(dimensions.maintenance_level), + len(dimensions.special_requirements) + ]) + + # 基礎信心度 + base_confidence = min(1.0, dimension_count * 0.15) + + # 品種提及獎勵 + breed_bonus = min(0.2, len(dimensions.breed_mentions) * 0.1) + + # 整體信心分數 + overall_confidence = dimensions.confidence_scores.get('overall', 0.5) + + return min(1.0, base_confidence + breed_bonus + overall_confidence * 0.3) + + def _generate_explanation(self, breed_info: Dict[str, Any], + dimensions: QueryDimensions, + dimensional_scores: Dict[str, float]) -> Dict[str, Any]: + """生成評分解釋""" + try: + explanation = { + 'strengths': [], + 'considerations': [], + 'match_highlights': [], + 'score_breakdown': dimensional_scores + } + + breed_name = breed_info.get('display_name', '') + + # 分析各維度表現 + for dimension, score in dimensional_scores.items(): + if score >= 0.8: + explanation['strengths'].append(self._get_strength_text(dimension, breed_info)) + elif score <= 0.3: + explanation['considerations'].append(self._get_consideration_text(dimension, breed_info)) + else: + explanation['match_highlights'].append(f"{dimension}: {score:.2f}") + + return explanation + + except Exception as e: + print(f"Error generating explanation: {str(e)}") + return {'strengths': [], 'considerations': [], 'match_highlights': []} + + def _get_strength_text(self, dimension: str, breed_info: Dict[str, Any]) -> str: + """Get strength description""" + breed_name = breed_info.get('display_name', '') + + if dimension == 'activity_compatibility': + return f"{breed_name} has an activity level that matches your lifestyle very well" + elif dimension == 'noise_compatibility': + return f"{breed_name} has noise characteristics that fit your environment" + elif dimension == 'spatial_compatibility': + return f"{breed_name} is very suitable for your living space" + elif dimension == 'family_compatibility': + return f"{breed_name} performs well in a family environment" + elif dimension == 'maintenance_compatibility': + return f"{breed_name} has grooming needs that match your willingness to commit" + else: + return f"{breed_name} shows strong performance in {dimension}" + + def _get_consideration_text(self, dimension: str, breed_info: Dict[str, Any]) -> str: + """Get consideration description""" + breed_name = breed_info.get('display_name', '') + + if dimension == 'activity_compatibility': + return f"{breed_name} may have exercise needs that differ from your lifestyle" + elif dimension == 'noise_compatibility': + return f"{breed_name} has noise characteristics that require special consideration" + elif dimension == 'maintenance_compatibility': + return f"{breed_name} has relatively high grooming requirements" + else: + return f"{breed_name} requires extra consideration in {dimension}" + + +def score_breed_candidates(candidate_breeds: Set[str], + dimensions: QueryDimensions, + sbert_model: Optional[SentenceTransformer] = None) -> List[BreedScore]: + """ + 便利函數:為候選品種評分 + + Args: + candidate_breeds: 候選品種集合 + dimensions: 查詢維度 + sbert_model: 可選的SBERT模型 + + Returns: + List[BreedScore]: 評分結果列表 + """ + scorer = MultiHeadScorer(sbert_model) + return scorer.score_breeds(candidate_breeds, dimensions) diff --git a/natural_language_processor.py b/natural_language_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..e7d6005f7f9cc9d3a446dac335098b6e8b4bcf3c --- /dev/null +++ b/natural_language_processor.py @@ -0,0 +1,488 @@ +import re +import string +from typing import Dict, List, Tuple, Optional, Any +import traceback + +class NaturalLanguageProcessor: + """ + Natural language processing utility class + Handles text preprocessing and keyword extraction for user input + """ + + def __init__(self): + """Initialize the natural language processor""" + self.stop_words = { + 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', + 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', + 'to', 'was', 'will', 'with', 'would', 'i', 'me', 'my', 'we', 'us', + 'our', 'you', 'your', 'they', 'them', 'their' + } + + # Breed name mappings (common aliases to standard names) + self.breed_aliases = { + 'lab': 'labrador_retriever', + 'labrador': 'labrador_retriever', + 'golden': 'golden_retriever', + 'retriever': ['labrador_retriever', 'golden_retriever'], + 'german shepherd': 'german_shepherd', + 'shepherd': 'german_shepherd', + 'border collie': 'border_collie', + 'collie': ['border_collie', 'collie'], + 'bulldog': ['french_bulldog', 'english_bulldog'], + 'french bulldog': 'french_bulldog', + 'poodle': ['standard_poodle', 'miniature_poodle', 'toy_poodle'], + 'husky': 'siberian_husky', + 'siberian husky': 'siberian_husky', + 'beagle': 'beagle', + 'yorkshire terrier': 'yorkshire_terrier', + 'yorkie': 'yorkshire_terrier', + 'chihuahua': 'chihuahua', + 'dachshund': 'dachshund', + 'wiener dog': 'dachshund', + 'rottweiler': 'rottweiler', + 'rottie': 'rottweiler', + 'boxer': 'boxer', + 'great dane': 'great_dane', + 'dane': 'great_dane', + 'mastiff': ['bull_mastiff', 'tibetan_mastiff'], + 'pitbull': 'american_staffordshire_terrier', + 'pit bull': 'american_staffordshire_terrier', + 'shih tzu': 'shih-tzu', + 'maltese': 'maltese_dog', + 'pug': 'pug', + 'basset hound': 'basset', + 'bloodhound': 'bloodhound', + 'australian shepherd': 'kelpie', + 'aussie': 'kelpie' + } + + # Lifestyle keyword mappings + self.lifestyle_keywords = { + 'living_space': { + 'apartment': ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban'], + 'house': ['house', 'home', 'yard', 'garden', 'suburban', 'large space'], + 'farm': ['farm', 'rural', 'country', 'acreage', 'ranch'] + }, + 'activity_level': { + 'very_high': ['very active', 'extremely energetic', 'marathon runner', 'athlete'], + 'high': ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor activities', + 'sports', 'jogging', 'biking', 'adventure'], + 'moderate': ['moderate exercise', 'some activity', 'weekend walks', 'occasional exercise'], + 'low': ['calm', 'lazy', 'indoor', 'low energy', 'couch potato', 'sedentary', 'quiet lifestyle'] + }, + 'family_situation': { + 'children': ['children', 'kids', 'toddlers', 'babies', 'family with children', 'young family'], + 'elderly': ['elderly', 'senior', 'old', 'retired', 'senior citizen'], + 'single': ['single', 'alone', 'individual', 'bachelor', 'solo'], + 'couple': ['couple', 'two people', 'pair', 'duo'] + }, + 'noise_tolerance': { + 'low': ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise'], + 'moderate': ['some noise ok', 'moderate barking', 'normal noise'], + 'high': ['loud ok', 'barking fine', 'noise tolerant', 'doesn\'t mind noise'] + }, + 'size_preference': { + 'small': ['small', 'tiny', 'little', 'compact', 'lap dog', 'petite', 'miniature'], + 'medium': ['medium', 'moderate size', 'average', 'mid-size'], + 'large': ['large', 'big', 'huge', 'giant', 'massive', 'substantial'], + 'varies': ['any size', 'size doesn\'t matter', 'flexible on size'] + }, + 'experience_level': { + 'beginner': ['first time', 'beginner', 'new to dogs', 'inexperienced', 'never had'], + 'some': ['some experience', 'had dogs before', 'moderate experience'], + 'experienced': ['experienced', 'expert', 'very experienced', 'professional', 'trainer'] + }, + 'grooming_commitment': { + 'low': ['low maintenance', 'easy care', 'minimal grooming', 'wash and go'], + 'moderate': ['moderate grooming', 'some brushing', 'regular care'], + 'high': ['high maintenance', 'lots of grooming', 'professional grooming', 'daily brushing'] + }, + 'special_needs': { + 'guard': ['guard dog', 'protection', 'security', 'watchdog', 'guardian'], + 'therapy': ['therapy dog', 'emotional support', 'comfort', 'calm companion'], + 'hypoallergenic': ['hypoallergenic', 'allergies', 'non-shedding', 'allergy friendly'], + 'working': ['working dog', 'job', 'task', 'service dog'], + 'companion': ['companion', 'friend', 'buddy', 'lap dog', 'cuddle'] + } + } + + # Comparative preference keywords + self.preference_indicators = { + 'love': 1.0, + 'prefer': 0.9, + 'like': 0.8, + 'want': 0.8, + 'interested in': 0.7, + 'considering': 0.6, + 'ok with': 0.5, + 'don\'t mind': 0.4, + 'not interested': 0.2, + 'dislike': 0.1, + 'hate': 0.0 + } + + # Order keywords + self.order_keywords = { + 'first': 1.0, 'most': 1.0, 'primary': 1.0, 'main': 1.0, + 'second': 0.8, 'then': 0.8, 'next': 0.8, + 'third': 0.6, 'also': 0.6, 'additionally': 0.6, + 'last': 0.4, 'least': 0.4, 'finally': 0.4 + } + + def preprocess_text(self, text: str) -> str: + """ + Text preprocessing + + Args: + text: Raw text + + Returns: + Preprocessed text + """ + if not text: + return "" + + # Convert to lowercase + text = text.lower().strip() + + # Remove punctuation (keep some meaningful ones) + text = re.sub(r'[^\w\s\-\']', ' ', text) + + # Handle extra whitespace + text = re.sub(r'\s+', ' ', text) + + return text + + def extract_breed_mentions(self, text: str) -> List[Tuple[str, float]]: + """ + Extract mentioned breeds and their preference levels from text + + Args: + text: Input text + + Returns: + List of (breed_name, preference_score) tuples + """ + text = self.preprocess_text(text) + breed_mentions = [] + + try: + # Check each breed alias + for alias, standard_breed in self.breed_aliases.items(): + if alias in text: + # Find surrounding preference indicators + preference_score = self._find_preference_score(text, alias) + + if isinstance(standard_breed, list): + # If alias maps to multiple breeds, add all + for breed in standard_breed: + breed_mentions.append((breed, preference_score)) + else: + breed_mentions.append((standard_breed, preference_score)) + + # Deduplicate and merge scores + breed_scores = {} + for breed, score in breed_mentions: + if breed in breed_scores: + breed_scores[breed] = max(breed_scores[breed], score) + else: + breed_scores[breed] = score + + return list(breed_scores.items()) + + except Exception as e: + print(f"Error extracting breed mentions: {str(e)}") + return [] + + def _find_preference_score(self, text: str, breed_mention: str) -> float: + """ + Find preference score near breed mention + + Args: + text: Text + breed_mention: Breed mention + + Returns: + Preference score (0.0-1.0) + """ + try: + # Find breed mention position + mention_pos = text.find(breed_mention) + if mention_pos == -1: + return 0.5 # Default neutral score + + # Check context (50 characters before and after) + context_start = max(0, mention_pos - 50) + context_end = min(len(text), mention_pos + len(breed_mention) + 50) + context = text[context_start:context_end] + + # Find preference indicators + max_score = 0.5 # Default score + + for indicator, score in self.preference_indicators.items(): + if indicator in context: + max_score = max(max_score, score) + + # Find order keywords + for order_word, multiplier in self.order_keywords.items(): + if order_word in context: + max_score = max(max_score, max_score * multiplier) + + return max_score + + except Exception as e: + print(f"Error finding preference score: {str(e)}") + return 0.5 + + def extract_lifestyle_preferences(self, text: str) -> Dict[str, Dict[str, float]]: + """ + Extract lifestyle preferences from text + + Args: + text: Input text + + Returns: + Lifestyle preferences dictionary + """ + text = self.preprocess_text(text) + preferences = {} + + try: + for category, keywords_dict in self.lifestyle_keywords.items(): + preferences[category] = {} + + for preference_type, keywords in keywords_dict.items(): + score = 0.0 + count = 0 + + for keyword in keywords: + if keyword in text: + # Calculate keyword occurrence intensity + keyword_count = text.count(keyword) + score += keyword_count + count += keyword_count + + if count > 0: + # Normalize score + preferences[category][preference_type] = min(score / max(count, 1), 1.0) + + return preferences + + except Exception as e: + print(f"Error extracting lifestyle preferences: {str(e)}") + return {} + + def generate_search_keywords(self, text: str) -> List[str]: + """ + Generate keyword list for search + + Args: + text: Input text + + Returns: + List of keywords + """ + text = self.preprocess_text(text) + keywords = [] + + try: + # Tokenize and filter stop words + words = text.split() + for word in words: + if len(word) > 2 and word not in self.stop_words: + keywords.append(word) + + # Extract important phrases + phrases = self._extract_phrases(text) + keywords.extend(phrases) + + # Remove duplicates + keywords = list(set(keywords)) + + return keywords + + except Exception as e: + print(f"Error generating search keywords: {str(e)}") + return [] + + def _extract_phrases(self, text: str) -> List[str]: + """ + Extract important phrases + + Args: + text: Input text + + Returns: + List of phrases + """ + phrases = [] + + # Define important phrase patterns + phrase_patterns = [ + r'good with \w+', + r'apartment \w+', + r'family \w+', + r'exercise \w+', + r'grooming \w+', + r'noise \w+', + r'training \w+', + r'health \w+', + r'\w+ friendly', + r'\w+ tolerant', + r'\w+ maintenance', + r'\w+ energy', + r'\w+ barking', + r'\w+ shedding' + ] + + for pattern in phrase_patterns: + matches = re.findall(pattern, text) + phrases.extend(matches) + + return phrases + + def analyze_sentiment(self, text: str) -> Dict[str, float]: + """ + Analyze text sentiment + + Args: + text: Input text + + Returns: + Sentiment analysis results {'positive': 0.0-1.0, 'negative': 0.0-1.0, 'neutral': 0.0-1.0} + """ + text = self.preprocess_text(text) + + positive_words = [ + 'love', 'like', 'want', 'prefer', 'good', 'great', 'excellent', + 'perfect', 'ideal', 'wonderful', 'amazing', 'fantastic' + ] + + negative_words = [ + 'hate', 'dislike', 'bad', 'terrible', 'awful', 'horrible', + 'not good', 'don\'t want', 'avoid', 'against', 'problem' + ] + + positive_count = sum(1 for word in positive_words if word in text) + negative_count = sum(1 for word in negative_words if word in text) + total_words = len(text.split()) + + if total_words == 0: + return {'positive': 0.0, 'negative': 0.0, 'neutral': 1.0} + + positive_ratio = positive_count / total_words + negative_ratio = negative_count / total_words + neutral_ratio = 1.0 - positive_ratio - negative_ratio + + return { + 'positive': positive_ratio, + 'negative': negative_ratio, + 'neutral': max(0.0, neutral_ratio) + } + + def extract_implicit_preferences(self, text: str) -> Dict[str, Any]: + """ + Extract implicit preferences from text + + Args: + text: Input text + + Returns: + Dictionary of implicit preferences + """ + text = self.preprocess_text(text) + implicit_prefs = {} + + try: + # Infer preferences from mentioned activities + if any(activity in text for activity in ['hiking', 'running', 'jogging', 'outdoor']): + implicit_prefs['exercise_needs'] = 'high' + implicit_prefs['size_preference'] = 'medium_to_large' + + # Infer from living environment + if any(env in text for env in ['apartment', 'small space', 'city']): + implicit_prefs['size_preference'] = 'small_to_medium' + implicit_prefs['noise_tolerance'] = 'low' + implicit_prefs['exercise_needs'] = 'moderate' + + # Infer from family situation + if 'children' in text or 'kids' in text: + implicit_prefs['temperament'] = 'gentle_patient' + implicit_prefs['good_with_children'] = True + + # Infer from experience level + if any(exp in text for exp in ['first time', 'beginner', 'new to']): + implicit_prefs['care_level'] = 'low_to_moderate' + implicit_prefs['training_difficulty'] = 'easy' + + # Infer from time commitment + if any(time in text for time in ['busy', 'no time', 'low maintenance']): + implicit_prefs['grooming_needs'] = 'low' + implicit_prefs['care_level'] = 'low' + implicit_prefs['exercise_needs'] = 'low_to_moderate' + + return implicit_prefs + + except Exception as e: + print(f"Error extracting implicit preferences: {str(e)}") + return {} + + def validate_input(self, text: str) -> Dict[str, Any]: + """ + Validate input text validity + + Args: + text: Input text + + Returns: + Validation results dictionary + """ + if not text or not text.strip(): + return { + 'is_valid': False, + 'error': 'Empty input', + 'suggestions': ['Please provide a description of your preferences'] + } + + text = text.strip() + + # Check length + if len(text) < 10: + return { + 'is_valid': False, + 'error': 'Input too short', + 'suggestions': ['Please provide more details about your preferences'] + } + + if len(text) > 1000: + return { + 'is_valid': False, + 'error': 'Input too long', + 'suggestions': ['Please provide a more concise description'] + } + + # Check for meaningful content + processed_text = self.preprocess_text(text) + meaningful_words = [word for word in processed_text.split() + if len(word) > 2 and word not in self.stop_words] + + if len(meaningful_words) < 3: + return { + 'is_valid': False, + 'error': 'Not enough meaningful content', + 'suggestions': ['Please provide more specific details about your lifestyle and preferences'] + } + + return { + 'is_valid': True, + 'word_count': len(meaningful_words), + 'suggestions': [] + } + +def get_nlp_processor(): + """Get natural language processor instance""" + try: + return NaturalLanguageProcessor() + except Exception as e: + print(f"Error creating NLP processor: {str(e)}") + return None diff --git a/query_understanding.py b/query_understanding.py new file mode 100644 index 0000000000000000000000000000000000000000..f9fd8047b84f549198ef3e2032588a4e43035544 --- /dev/null +++ b/query_understanding.py @@ -0,0 +1,464 @@ +import re +import json +import numpy as np +import sqlite3 +from typing import Dict, List, Tuple, Optional, Any +from dataclasses import dataclass, field +import traceback +from sentence_transformers import SentenceTransformer +from dog_database import get_dog_description +from breed_health_info import breed_health_info +from breed_noise_info import breed_noise_info + +@dataclass +class QueryDimensions: + """Structured query intent data structure""" + spatial_constraints: List[str] = field(default_factory=list) + activity_level: List[str] = field(default_factory=list) + noise_preferences: List[str] = field(default_factory=list) + size_preferences: List[str] = field(default_factory=list) + family_context: List[str] = field(default_factory=list) + maintenance_level: List[str] = field(default_factory=list) + special_requirements: List[str] = field(default_factory=list) + breed_mentions: List[str] = field(default_factory=list) + confidence_scores: Dict[str, float] = field(default_factory=dict) + +@dataclass +class DimensionalSynonyms: + """Dimensional synonyms dictionary structure""" + spatial: Dict[str, List[str]] = field(default_factory=dict) + activity: Dict[str, List[str]] = field(default_factory=dict) + noise: Dict[str, List[str]] = field(default_factory=dict) + size: Dict[str, List[str]] = field(default_factory=dict) + family: Dict[str, List[str]] = field(default_factory=dict) + maintenance: Dict[str, List[str]] = field(default_factory=dict) + special: Dict[str, List[str]] = field(default_factory=dict) + +class QueryUnderstandingEngine: + """ + 多維度語義查詢理解引擎 + 支援中英文自然語言理解並轉換為結構化品種推薦查詢 + """ + + def __init__(self): + """初始化查詢理解引擎""" + self.sbert_model = None + self.breed_list = self._load_breed_list() + self.synonyms = self._initialize_synonyms() + self.semantic_templates = {} + self._initialize_sbert_model() + self._build_semantic_templates() + + def _load_breed_list(self) -> List[str]: + """載入品種清單""" + try: + conn = sqlite3.connect('animal_detector.db') + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") + breeds = [row[0] for row in cursor.fetchall()] + cursor.close() + conn.close() + return breeds + except Exception as e: + print(f"Error loading breed list: {str(e)}") + # 備用品種清單 + return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', + 'Bulldog', 'Poodle', 'Beagle', 'Border_Collie', 'Yorkshire_Terrier'] + + def _initialize_sbert_model(self): + """初始化 SBERT 模型""" + try: + model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2'] + + for model_name in model_options: + try: + self.sbert_model = SentenceTransformer(model_name) + print(f"SBERT model {model_name} loaded successfully for query understanding") + return + except Exception as e: + print(f"Failed to load {model_name}: {str(e)}") + continue + + print("All SBERT models failed to load. Using keyword-only analysis.") + self.sbert_model = None + + except Exception as e: + print(f"Failed to initialize SBERT model: {str(e)}") + self.sbert_model = None + + def _initialize_synonyms(self) -> DimensionalSynonyms: + """初始化多維度同義詞字典""" + return DimensionalSynonyms( + spatial={ + 'apartment': ['apartment', 'flat', 'condo', 'small space', 'city living', + 'urban', 'no yard', 'indoor'], + 'house': ['house', 'home', 'yard', 'garden', 'backyard', 'large space', + 'suburban', 'rural', 'farm'] + }, + activity={ + 'low': ['low activity', 'sedentary', 'couch potato', 'minimal exercise', + 'indoor lifestyle', 'lazy', 'calm'], + 'moderate': ['moderate activity', 'daily walks', 'light exercise', + 'regular walks'], + 'high': ['high activity', 'energetic', 'active', 'exercise', 'hiking', + 'running', 'jogging', 'outdoor sports'] + }, + noise={ + 'low': ['quiet', 'silent', 'no barking', 'peaceful', 'low noise', + 'rarely barks', 'soft-spoken'], + 'moderate': ['moderate barking', 'occasional barking'], + 'high': ['loud', 'barking', 'vocal', 'noisy', 'frequent barking', + 'alert dog'] + }, + size={ + 'small': ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', + 'lap dog'], + 'medium': ['medium', 'moderate size', 'average', 'mid-sized'], + 'large': ['large', 'big', 'giant', 'huge', 'massive', 'great'] + }, + family={ + 'children': ['children', 'kids', 'family', 'child-friendly', 'toddler', + 'baby', 'school age'], + 'elderly': ['elderly', 'senior', 'old people', 'retirement', 'aged'], + 'single': ['single', 'alone', 'individual', 'solo', 'myself'] + }, + maintenance={ + 'low': ['low maintenance', 'easy care', 'simple', 'minimal grooming', + 'wash and go'], + 'moderate': ['moderate maintenance', 'regular grooming'], + 'high': ['high maintenance', 'professional grooming', 'daily brushing', + 'care intensive'] + }, + special={ + 'guard': ['guard dog', 'protection', 'security', 'watchdog', + 'protective', 'defender'], + 'companion': ['companion', 'therapy', 'emotional support', 'comfort', + 'cuddly', 'lap dog'], + 'hypoallergenic': ['hypoallergenic', 'allergies', 'non-shedding', + 'allergy-friendly', 'no shed'], + 'first_time': ['first time', 'beginner', 'new to dogs', 'inexperienced', + 'never owned'] + } + ) + + def _build_semantic_templates(self): + """建立語義模板向量(僅在 SBERT 可用時)""" + if not self.sbert_model: + return + + try: + # 為每個維度建立模板句子 + templates = { + 'spatial_apartment': "I live in an apartment with limited space and no yard", + 'spatial_house': "I live in a house with a large yard and outdoor space", + 'activity_low': "I prefer a calm, low-energy dog that doesn't need much exercise", + 'activity_high': "I want an active, energetic dog for hiking and outdoor activities", + 'noise_low': "I need a quiet dog that rarely barks and won't disturb neighbors", + 'noise_high': "I don't mind a vocal dog that barks and makes noise", + 'size_small': "I prefer small, compact dogs that are easy to handle", + 'size_large': "I want a large, impressive dog with strong presence", + 'family_children': "I have young children and need a child-friendly dog", + 'family_elderly': "I'm looking for a calm companion dog for elderly person", + 'maintenance_low': "I want a low-maintenance dog that's easy to care for", + 'maintenance_high': "I don't mind high-maintenance dogs requiring professional grooming" + } + + # 生成模板向量 + for key, template in templates.items(): + embedding = self.sbert_model.encode(template, convert_to_tensor=False) + self.semantic_templates[key] = embedding + + print(f"Built {len(self.semantic_templates)} semantic templates") + + except Exception as e: + print(f"Error building semantic templates: {str(e)}") + self.semantic_templates = {} + + def analyze_query(self, user_input: str) -> QueryDimensions: + """ + 分析使用者查詢並提取多維度意圖 + + Args: + user_input: 使用者的自然語言查詢 + + Returns: + QueryDimensions: 結構化的查詢維度 + """ + try: + # 正規化輸入文字 + normalized_input = user_input.lower().strip() + + # 基於關鍵字的維度分析 + dimensions = self._extract_keyword_dimensions(normalized_input) + + # 如果 SBERT 可用,進行語義分析增強 + if self.sbert_model: + semantic_dimensions = self._extract_semantic_dimensions(user_input) + dimensions = self._merge_dimensions(dimensions, semantic_dimensions) + + # 提取品種提及 + dimensions.breed_mentions = self._extract_breed_mentions(normalized_input) + + # 計算信心分數 + dimensions.confidence_scores = self._calculate_confidence_scores(dimensions, user_input) + + return dimensions + + except Exception as e: + print(f"Error analyzing query: {str(e)}") + print(traceback.format_exc()) + # 回傳空的維度結構 + return QueryDimensions() + + def _extract_keyword_dimensions(self, text: str) -> QueryDimensions: + """基於關鍵字提取維度""" + dimensions = QueryDimensions() + + # 空間限制分析 + for category, keywords in self.synonyms.spatial.items(): + if any(keyword in text for keyword in keywords): + dimensions.spatial_constraints.append(category) + + # 活動水平分析 + for level, keywords in self.synonyms.activity.items(): + if any(keyword in text for keyword in keywords): + dimensions.activity_level.append(level) + + # 噪音偏好分析 + for level, keywords in self.synonyms.noise.items(): + if any(keyword in text for keyword in keywords): + dimensions.noise_preferences.append(level) + + # 尺寸偏好分析 + for size, keywords in self.synonyms.size.items(): + if any(keyword in text for keyword in keywords): + dimensions.size_preferences.append(size) + + # 家庭情況分析 + for context, keywords in self.synonyms.family.items(): + if any(keyword in text for keyword in keywords): + dimensions.family_context.append(context) + + # 維護水平分析 + for level, keywords in self.synonyms.maintenance.items(): + if any(keyword in text for keyword in keywords): + dimensions.maintenance_level.append(level) + + # 特殊需求分析 + for requirement, keywords in self.synonyms.special.items(): + if any(keyword in text for keyword in keywords): + dimensions.special_requirements.append(requirement) + + return dimensions + + def _extract_semantic_dimensions(self, text: str) -> QueryDimensions: + """基於語義相似度提取維度(需要 SBERT)""" + if not self.sbert_model or not self.semantic_templates: + return QueryDimensions() + + try: + # 生成查詢向量 + query_embedding = self.sbert_model.encode(text, convert_to_tensor=False) + + dimensions = QueryDimensions() + + # 計算與各個模板的相似度 + similarities = {} + for template_key, template_embedding in self.semantic_templates.items(): + similarity = np.dot(query_embedding, template_embedding) / ( + np.linalg.norm(query_embedding) * np.linalg.norm(template_embedding) + ) + similarities[template_key] = similarity + + # 設定相似度閾值 + threshold = 0.5 + + # 根據相似度提取維度 + for template_key, similarity in similarities.items(): + if similarity > threshold: + if template_key.startswith('spatial_'): + category = template_key.replace('spatial_', '') + if category not in dimensions.spatial_constraints: + dimensions.spatial_constraints.append(category) + elif template_key.startswith('activity_'): + level = template_key.replace('activity_', '') + if level not in dimensions.activity_level: + dimensions.activity_level.append(level) + elif template_key.startswith('noise_'): + level = template_key.replace('noise_', '') + if level not in dimensions.noise_preferences: + dimensions.noise_preferences.append(level) + elif template_key.startswith('size_'): + size = template_key.replace('size_', '') + if size not in dimensions.size_preferences: + dimensions.size_preferences.append(size) + elif template_key.startswith('family_'): + context = template_key.replace('family_', '') + if context not in dimensions.family_context: + dimensions.family_context.append(context) + elif template_key.startswith('maintenance_'): + level = template_key.replace('maintenance_', '') + if level not in dimensions.maintenance_level: + dimensions.maintenance_level.append(level) + + return dimensions + + except Exception as e: + print(f"Error in semantic dimension extraction: {str(e)}") + return QueryDimensions() + + def _extract_breed_mentions(self, text: str) -> List[str]: + """提取品種提及""" + mentioned_breeds = [] + + for breed in self.breed_list: + # 將品種名稱轉換為顯示格式 + breed_display = breed.replace('_', ' ').lower() + breed_words = breed_display.split() + + # 檢查品種名稱是否在文字中 + breed_found = False + + # 完整品種名稱匹配 + if breed_display in text: + breed_found = True + else: + # 部分匹配(至少匹配品種名稱的主要部分) + main_word = breed_words[0] if breed_words else "" + if len(main_word) > 3 and main_word in text: + breed_found = True + + if breed_found: + mentioned_breeds.append(breed) + + return mentioned_breeds + + def _merge_dimensions(self, keyword_dims: QueryDimensions, + semantic_dims: QueryDimensions) -> QueryDimensions: + """合併關鍵字和語義維度""" + merged = QueryDimensions() + + # 合併各個維度的結果(去重) + merged.spatial_constraints = list(set( + keyword_dims.spatial_constraints + semantic_dims.spatial_constraints + )) + merged.activity_level = list(set( + keyword_dims.activity_level + semantic_dims.activity_level + )) + merged.noise_preferences = list(set( + keyword_dims.noise_preferences + semantic_dims.noise_preferences + )) + merged.size_preferences = list(set( + keyword_dims.size_preferences + semantic_dims.size_preferences + )) + merged.family_context = list(set( + keyword_dims.family_context + semantic_dims.family_context + )) + merged.maintenance_level = list(set( + keyword_dims.maintenance_level + semantic_dims.maintenance_level + )) + merged.special_requirements = list(set( + keyword_dims.special_requirements + semantic_dims.special_requirements + )) + + return merged + + def _calculate_confidence_scores(self, dimensions: QueryDimensions, + original_text: str) -> Dict[str, float]: + """計算各維度的信心分數""" + confidence_scores = {} + + # 基於匹配的關鍵字數量計算信心分數 + text_length = len(original_text.split()) + + # 空間限制信心分數 + spatial_matches = len(dimensions.spatial_constraints) + confidence_scores['spatial'] = min(1.0, spatial_matches * 0.5) + + # 活動水平信心分數 + activity_matches = len(dimensions.activity_level) + confidence_scores['activity'] = min(1.0, activity_matches * 0.5) + + # 噪音偏好信心分數 + noise_matches = len(dimensions.noise_preferences) + confidence_scores['noise'] = min(1.0, noise_matches * 0.5) + + # 尺寸偏好信心分數 + size_matches = len(dimensions.size_preferences) + confidence_scores['size'] = min(1.0, size_matches * 0.5) + + # 家庭情況信心分數 + family_matches = len(dimensions.family_context) + confidence_scores['family'] = min(1.0, family_matches * 0.5) + + # 維護水平信心分數 + maintenance_matches = len(dimensions.maintenance_level) + confidence_scores['maintenance'] = min(1.0, maintenance_matches * 0.5) + + # 特殊需求信心分數 + special_matches = len(dimensions.special_requirements) + confidence_scores['special'] = min(1.0, special_matches * 0.5) + + # 品種提及信心分數 + breed_matches = len(dimensions.breed_mentions) + confidence_scores['breeds'] = min(1.0, breed_matches * 0.3) + + # 整體信心分數(基於總匹配數量和文字長度) + total_matches = sum([ + spatial_matches, activity_matches, noise_matches, size_matches, + family_matches, maintenance_matches, special_matches, breed_matches + ]) + confidence_scores['overall'] = min(1.0, total_matches / max(1, text_length * 0.1)) + + return confidence_scores + + def get_dimension_summary(self, dimensions: QueryDimensions) -> Dict[str, Any]: + """獲取維度摘要信息""" + return { + 'spatial_constraints': dimensions.spatial_constraints, + 'activity_level': dimensions.activity_level, + 'noise_preferences': dimensions.noise_preferences, + 'size_preferences': dimensions.size_preferences, + 'family_context': dimensions.family_context, + 'maintenance_level': dimensions.maintenance_level, + 'special_requirements': dimensions.special_requirements, + 'breed_mentions': [breed.replace('_', ' ') for breed in dimensions.breed_mentions], + 'confidence_scores': dimensions.confidence_scores, + 'total_dimensions_detected': sum([ + len(dimensions.spatial_constraints), + len(dimensions.activity_level), + len(dimensions.noise_preferences), + len(dimensions.size_preferences), + len(dimensions.family_context), + len(dimensions.maintenance_level), + len(dimensions.special_requirements) + ]) + } + +# 便利函數 +def analyze_user_query(user_input: str) -> QueryDimensions: + """ + 便利函數:分析使用者查詢 + + Args: + user_input: 使用者的自然語言查詢 + + Returns: + QueryDimensions: 結構化的查詢維度 + """ + engine = QueryUnderstandingEngine() + return engine.analyze_query(user_input) + +def get_query_summary(user_input: str) -> Dict[str, Any]: + """ + 便利函數:獲取查詢摘要 + + Args: + user_input: 使用者的自然語言查詢 + + Returns: + Dict: 查詢維度摘要 + """ + engine = QueryUnderstandingEngine() + dimensions = engine.analyze_query(user_input) + return engine.get_dimension_summary(dimensions) diff --git a/recommendation_formatter.py b/recommendation_formatter.py new file mode 100644 index 0000000000000000000000000000000000000000..5d845161be3df8ba159495c3d1a8b1e307defcd1 --- /dev/null +++ b/recommendation_formatter.py @@ -0,0 +1,321 @@ +import sqlite3 +import traceback +import random +from typing import List, Dict +from breed_health_info import breed_health_info, default_health_note +from breed_noise_info import breed_noise_info +from dog_database import get_dog_description +from scoring_calculation_system import UserPreferences, calculate_compatibility_score + +def get_breed_recommendations(user_prefs: UserPreferences, top_n: int = 15) -> List[Dict]: + """基於使用者偏好推薦狗品種,確保正確的分數排序""" + print(f"Starting get_breed_recommendations with top_n={top_n}") + recommendations = [] + seen_breeds = set() + + try: + # 獲取所有品種 + conn = sqlite3.connect('animal_detector.db') + cursor = conn.cursor() + cursor.execute("SELECT Breed FROM AnimalCatalog") + all_breeds = cursor.fetchall() + conn.close() + + print(f"Total breeds in database: {len(all_breeds)}") + + # 收集所有品種的分數 + for breed_tuple in all_breeds: + breed = breed_tuple[0] + base_breed = breed.split('(')[0].strip() + + if base_breed in seen_breeds: + continue + seen_breeds.add(base_breed) + + # 獲取品種資訊 + breed_info = get_dog_description(breed) + if not isinstance(breed_info, dict): + continue + + # 調整品種尺寸過濾邏輯,避免過度限制候選品種 + if user_prefs.size_preference != "no_preference": + breed_size = breed_info.get('Size', '').lower() + user_size = user_prefs.size_preference.lower() + + # 放寬尺寸匹配條件,允許相鄰尺寸的品種通過篩選 + size_compatibility = False + if user_size == 'small': + size_compatibility = breed_size in ['small', 'medium'] + elif user_size == 'medium': + size_compatibility = breed_size in ['small', 'medium', 'large'] + elif user_size == 'large': + size_compatibility = breed_size in ['medium', 'large'] + else: + size_compatibility = True + + if not size_compatibility: + continue + + # 獲取噪音資訊 + noise_info = breed_noise_info.get(breed, { + "noise_notes": "Noise information not available", + "noise_level": "Unknown", + "source": "N/A" + }) + + # 將噪音資訊整合到品種資訊中 + breed_info['noise_info'] = noise_info + + # 計算基礎相容性分數 + compatibility_scores = calculate_compatibility_score(breed_info, user_prefs) + + # 計算品種特定加分 + breed_bonus = 0.0 + + # 壽命加分 + try: + lifespan = breed_info.get('Lifespan', '10-12 years') + years = [int(x) for x in lifespan.split('-')[0].split()[0:1]] + longevity_bonus = min(0.02, (max(years) - 10) * 0.005) + breed_bonus += longevity_bonus + except: + pass + + # 性格特徵加分 + temperament = breed_info.get('Temperament', '').lower() + positive_traits = ['friendly', 'gentle', 'affectionate', 'intelligent'] + negative_traits = ['aggressive', 'stubborn', 'dominant'] + + breed_bonus += sum(0.01 for trait in positive_traits if trait in temperament) + breed_bonus -= sum(0.01 for trait in negative_traits if trait in temperament) + + # 與孩童相容性加分 + if user_prefs.has_children: + if breed_info.get('Good with Children') == 'Yes': + breed_bonus += 0.02 + elif breed_info.get('Good with Children') == 'No': + breed_bonus -= 0.03 + + # 噪音相關加分 + if user_prefs.noise_tolerance == 'low': + if noise_info['noise_level'].lower() == 'high': + breed_bonus -= 0.03 + elif noise_info['noise_level'].lower() == 'low': + breed_bonus += 0.02 + elif user_prefs.noise_tolerance == 'high': + if noise_info['noise_level'].lower() == 'high': + breed_bonus += 0.01 + + # 計算最終分數並加入自然變異 + breed_hash = hash(breed) + random.seed(breed_hash) + + # Add small natural variation to avoid identical scores + natural_variation = random.uniform(-0.008, 0.008) + breed_bonus = round(breed_bonus + natural_variation, 4) + final_score = round(min(1.0, compatibility_scores['overall'] + breed_bonus), 4) + + recommendations.append({ + 'breed': breed, + 'base_score': round(compatibility_scores['overall'], 4), + 'bonus_score': round(breed_bonus, 4), + 'final_score': final_score, + 'scores': compatibility_scores, + 'info': breed_info, + 'noise_info': noise_info + }) + + print(f"Breeds after filtering: {len(recommendations)}") + + # 嚴格按照 final_score 排序 + recommendations.sort(key=lambda x: (round(-x['final_score'], 4), x['breed'])) + + # 修正後的推薦選擇邏輯,移除有問題的分數比較 + final_recommendations = [] + + # 直接選取前 top_n 個品種,確保返回完整數量 + for i, rec in enumerate(recommendations[:top_n]): + rec['rank'] = i + 1 + final_recommendations.append(rec) + + print(f"Final recommendations count: {len(final_recommendations)}") + + # 驗證最終排序 + for i in range(len(final_recommendations)-1): + current = final_recommendations[i] + next_rec = final_recommendations[i+1] + + if current['final_score'] < next_rec['final_score']: + print(f"Warning: Sorting error detected!") + print(f"#{i+1} {current['breed']}: {current['final_score']}") + print(f"#{i+2} {next_rec['breed']}: {next_rec['final_score']}") + + # 交換位置 + final_recommendations[i], final_recommendations[i+1] = \ + final_recommendations[i+1], final_recommendations[i] + + # 打印最終結果以供驗證 + print("\nFinal Rankings:") + for rec in final_recommendations: + print(f"#{rec['rank']} {rec['breed']}") + print(f"Base Score: {rec['base_score']:.4f}") + print(f"Bonus: {rec['bonus_score']:.4f}") + print(f"Final Score: {rec['final_score']:.4f}\n") + + return final_recommendations + + except Exception as e: + print(f"Error in get_breed_recommendations: {str(e)}") + print(f"Traceback: {traceback.format_exc()}") + + +def _format_dimension_scores(dimension_scores: Dict) -> str: + """Format individual dimension scores as badges""" + if not dimension_scores: + return "" + + badges_html = '
' + + for dimension, score in dimension_scores.items(): + if isinstance(score, (int, float)): + score_percent = score * 100 + else: + score_percent = 75 # default + + if score_percent >= 80: + badge_class = "badge-high" + elif score_percent >= 60: + badge_class = "badge-medium" + else: + badge_class = "badge-low" + + dimension_label = dimension.replace('_', ' ').title() + badges_html += f''' + + {dimension_label}: {score_percent:.0f}% + + ''' + + badges_html += '
' + return badges_html + + +def calculate_breed_bonus_factors(breed_info: dict, user_prefs: 'UserPreferences') -> tuple: + """計算品種額外加分因素並返回原因列表""" + bonus = 0.0 + reasons = [] + + # 壽命加分 + try: + lifespan = breed_info.get('Lifespan', '10-12 years') + years = [int(x) for x in lifespan.split('-')[0].split()[0:1]] + if max(years) >= 12: + bonus += 0.02 + reasons.append("Above-average lifespan") + except: + pass + + # 性格特徵加分 + temperament = breed_info.get('Temperament', '').lower() + if any(trait in temperament for trait in ['friendly', 'gentle', 'affectionate']): + bonus += 0.01 + reasons.append("Positive temperament traits") + + # 與孩童相容性 + if breed_info.get('Good with Children') == 'Yes': + bonus += 0.01 + reasons.append("Excellent with children") + + return bonus, reasons + + +def generate_breed_characteristics_data(breed_info: dict) -> List[tuple]: + """生成品種特徵資料列表""" + return [ + ('Size', breed_info.get('Size', 'Unknown')), + ('Exercise Needs', breed_info.get('Exercise Needs', 'Moderate')), + ('Grooming Needs', breed_info.get('Grooming Needs', 'Moderate')), + ('Good with Children', breed_info.get('Good with Children', 'Yes')), + ('Temperament', breed_info.get('Temperament', '')), + ('Lifespan', breed_info.get('Lifespan', '10-12 years')), + ('Description', breed_info.get('Description', '')) + ] + + +def parse_noise_information(noise_info: dict) -> tuple: + """解析噪音資訊並返回結構化資料""" + noise_notes = noise_info.get('noise_notes', '').split('\n') + noise_characteristics = [] + barking_triggers = [] + noise_level = '' + + current_section = None + for line in noise_notes: + line = line.strip() + if 'Typical noise characteristics:' in line: + current_section = 'characteristics' + elif 'Noise level:' in line: + noise_level = line.replace('Noise level:', '').strip() + elif 'Barking triggers:' in line: + current_section = 'triggers' + elif line.startswith('•'): + if current_section == 'characteristics': + noise_characteristics.append(line[1:].strip()) + elif current_section == 'triggers': + barking_triggers.append(line[1:].strip()) + + return noise_characteristics, barking_triggers, noise_level + + +def parse_health_information(health_info: dict) -> tuple: + """解析健康資訊並返回結構化資料""" + health_notes = health_info.get('health_notes', '').split('\n') + health_considerations = [] + health_screenings = [] + + current_section = None + for line in health_notes: + line = line.strip() + if 'Common breed-specific health considerations' in line: + current_section = 'considerations' + elif 'Recommended health screenings:' in line: + current_section = 'screenings' + elif line.startswith('•'): + if current_section == 'considerations': + health_considerations.append(line[1:].strip()) + elif current_section == 'screenings': + health_screenings.append(line[1:].strip()) + + return health_considerations, health_screenings + + +def generate_dimension_scores_for_display(base_score: float, rank: int, breed: str, + semantic_score: float = 0.7, + comparative_bonus: float = 0.0, + lifestyle_bonus: float = 0.0, + is_description_search: bool = False) -> dict: + """為顯示生成維度分數""" + random.seed(hash(breed) + rank) # 一致的隨機性 + + if is_description_search: + # Description search: 創建更自然的分數分佈在50%-95%範圍內 + score_variance = 0.08 if base_score > 0.7 else 0.06 + + scores = { + 'space': max(0.50, min(0.95, + base_score * 0.92 + (lifestyle_bonus * 0.5) + random.uniform(-score_variance, score_variance))), + 'exercise': max(0.50, min(0.95, + base_score * 0.88 + (lifestyle_bonus * 0.4) + random.uniform(-score_variance, score_variance))), + 'grooming': max(0.50, min(0.95, + base_score * 0.85 + (comparative_bonus * 0.4) + random.uniform(-score_variance, score_variance))), + 'experience': max(0.50, min(0.95, + base_score * 0.87 + (lifestyle_bonus * 0.3) + random.uniform(-score_variance, score_variance))), + 'noise': max(0.50, min(0.95, + base_score * 0.83 + (lifestyle_bonus * 0.6) + random.uniform(-score_variance, score_variance))), + 'overall': base_score + } + else: + # 傳統搜尋結果的分數結構會在呼叫處理中傳入 + scores = {'overall': base_score} + + return scores diff --git a/recommendation_html_format.py b/recommendation_html_format.py index fa06498e790d9738f72c4221ff35a7743e690aee..020ec6eac9d40a503c072b473ffe733cabfdd9ce 100644 --- a/recommendation_html_format.py +++ b/recommendation_html_format.py @@ -1,160 +1,102 @@ -import sqlite3 -import traceback +import random from typing import List, Dict from breed_health_info import breed_health_info, default_health_note from breed_noise_info import breed_noise_info from dog_database import get_dog_description -from scoring_calculation_system import UserPreferences, calculate_compatibility_score +from scoring_calculation_system import UserPreferences +from recommendation_formatter import ( + get_breed_recommendations, + _format_dimension_scores, + calculate_breed_bonus_factors, + generate_breed_characteristics_data, + parse_noise_information, + parse_health_information, + generate_dimension_scores_for_display +) +from recommendation_html_formatter import RecommendationHTMLFormatter -def format_recommendation_html(recommendations: List[Dict], is_description_search: bool = False) -> str: - """將推薦結果格式化為HTML""" - - html_content = """ - -
""" - - def _convert_to_display_score(score: float, score_type: str = None) -> int: - """ - 更改為生成更明顯差異的顯示分數 - """ - try: - # 基礎分數轉換(保持相對關係但擴大差異) - if score_type == 'bonus': # Breed Bonus 使用不同的轉換邏輯 - base_score = 35 + (score * 60) # 35-95 範圍,差異更大 - else: - # 其他類型的分數轉換 - if score <= 0.3: - base_score = 40 + (score * 45) # 40-53.5 範圍 - elif score <= 0.6: - base_score = 55 + ((score - 0.3) * 55) # 55-71.5 範圍 - elif score <= 0.8: - base_score = 72 + ((score - 0.6) * 60) # 72-84 範圍 - else: - base_score = 85 + ((score - 0.8) * 50) # 85-95 範圍 - - # 添加不規則的微調,但保持相對關係 - import random - if score_type == 'bonus': - adjustment = random.uniform(-2, 2) - else: - # 根據分數範圍決定調整幅度 - if score > 0.8: - adjustment = random.uniform(-3, 3) - elif score > 0.6: - adjustment = random.uniform(-4, 4) - else: - adjustment = random.uniform(-2, 2) - - final_score = base_score + adjustment - - # 確保最終分數在合理範圍內並避免5的倍數 - final_score = min(95, max(40, final_score)) - rounded_score = round(final_score) - if rounded_score % 5 == 0: - rounded_score += random.choice([-1, 1]) - - return rounded_score - - except Exception as e: - print(f"Error in convert_to_display_score: {str(e)}") - return 70 - - - def _generate_progress_bar(score: float, score_type: str = None) -> dict: - """ - 生成進度條的寬度和顏色 - - Parameters: - score: 原始分數 (0-1 之間的浮點數) - score_type: 分數類型,用於特殊處理某些類型的分數 - - Returns: - dict: 包含寬度和顏色的字典 - """ - # 計算寬度 - if score_type == 'bonus': - # Breed Bonus 特殊的計算方式 - width = min(100, max(5, 10 + (score * 300))) - else: - # 一般分數的計算 - if score >= 0.9: - width = 90 + (score - 0.9) * 100 - elif score >= 0.7: - width = 70 + (score - 0.7) * 100 - elif score >= 0.5: - width = 40 + (score - 0.5) * 150 - elif score >= 0.3: - width = 20 + (score - 0.3) * 100 - else: - width = max(5, score * 66.7) - - # 根據分數決定顏色 - if score >= 0.9: - color = '#68b36b' # 高分段柔和綠 - elif score >= 0.7: - color = '#9bcf74' # 中高分段略黃綠 - elif score >= 0.5: - color = '#d4d880' # 中等分段黃綠 - elif score >= 0.3: - color = '#e3b583' # 偏低分段柔和橘 - else: - color = '#e9a098' # 低分段暖紅粉 - - return { - 'width': width, - 'color': color - } +def format_recommendation_html(recommendations: List[Dict], is_description_search: bool = False) -> str: + """統一推薦結果HTML格式化,確保視覺與數值邏輯一致""" + + # 創建HTML格式器實例 + formatter = RecommendationHTMLFormatter() + + # 獲取對應的CSS樣式 + html_content = formatter.get_css_styles(is_description_search) + "
" for rec in recommendations: breed = rec['breed'] - scores = rec['scores'] - info = rec['info'] rank = rec.get('rank', 0) - final_score = rec.get('final_score', scores['overall']) - bonus_score = rec.get('bonus_score', 0) + + breed_name_for_db = breed.replace(' ', '_') + breed_info_from_db = get_dog_description(breed_name_for_db) if is_description_search: + # Handle semantic search results structure - use scores directly from semantic recommender + overall_score = rec.get('overall_score', 0.7) + final_score = rec.get('final_score', overall_score) # Use final_score if available + semantic_score = rec.get('semantic_score', 0.7) + comparative_bonus = rec.get('comparative_bonus', 0.0) + lifestyle_bonus = rec.get('lifestyle_bonus', 0.0) + + # Use the actual calculated scores without re-computation + base_score = final_score + + # Generate dimension scores using the formatter helper + scores = generate_dimension_scores_for_display( + base_score, rank, breed, semantic_score, + comparative_bonus, lifestyle_bonus, is_description_search + ) + + bonus_score = max(0.0, comparative_bonus + random.uniform(-0.02, 0.02)) + info = generate_breed_characteristics_data(breed_info_from_db or {}) + info = dict(info) # Convert to dict for compatibility + + # Add any missing fields from rec + if not breed_info_from_db: + for key in ['Size', 'Exercise Needs', 'Grooming Needs', 'Good with Children', 'Temperament', 'Lifespan', 'Description']: + if key not in info: + info[key] = rec.get(key.lower().replace(' ', '_'), 'Unknown' if key != 'Description' else '') + + # Display scores as percentages with one decimal place display_scores = { - 'space': _convert_to_display_score(scores['space'], 'space'), - 'exercise': _convert_to_display_score(scores['exercise'], 'exercise'), - 'grooming': _convert_to_display_score(scores['grooming'], 'grooming'), - 'experience': _convert_to_display_score(scores['experience'], 'experience'), - 'noise': _convert_to_display_score(scores['noise'], 'noise') + 'space': round(scores['space'] * 100, 1), + 'exercise': round(scores['exercise'] * 100, 1), + 'grooming': round(scores['grooming'] * 100, 1), + 'experience': round(scores['experience'] * 100, 1), + 'noise': round(scores['noise'] * 100, 1), } else: - display_scores = scores # 圖片識別使用原始分數 + # Handle traditional search results structure + scores = rec['scores'] + info = rec['info'] + final_score = rec.get('final_score', scores['overall']) + bonus_score = rec.get('bonus_score', 0) + # Convert traditional scores to percentage display format with one decimal + display_scores = { + 'space': round(scores.get('space', 0) * 100, 1), + 'exercise': round(scores.get('exercise', 0) * 100, 1), + 'grooming': round(scores.get('grooming', 0) * 100, 1), + 'experience': round(scores.get('experience', 0) * 100, 1), + 'noise': round(scores.get('noise', 0) * 100, 1), + } progress_bars = {} for metric in ['space', 'exercise', 'grooming', 'experience', 'noise']: if metric in scores: - bar_data = _generate_progress_bar(scores[metric], metric) + # 使用顯示分數(百分比)來計算進度條 + display_score = display_scores[metric] + bar_data = formatter.generate_progress_bar(display_score, metric, is_percentage_display=True, is_description_search=is_description_search) progress_bars[metric] = { 'style': f"width: {bar_data['width']}%; background-color: {bar_data['color']};" } - # bonus + # bonus if bonus_score > 0: - bonus_data = _generate_progress_bar(bonus_score, 'bonus') + # bonus_score 通常是 0-1 範圍,需要轉換為百分比顯示 + bonus_percentage = bonus_score * 100 + bonus_data = formatter.generate_progress_bar(bonus_percentage, 'bonus', is_percentage_display=True, is_description_search=is_description_search) progress_bars['bonus'] = { 'style': f"width: {bonus_data['width']}%; background-color: {bonus_data['color']};" } @@ -166,98 +108,34 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc "source": "N/A" }) - # 解析噪音資訊 - noise_notes = noise_info.get('noise_notes', '').split('\n') - noise_characteristics = [] - barking_triggers = [] - noise_level = '' - - current_section = None - for line in noise_notes: - line = line.strip() - if 'Typical noise characteristics:' in line: - current_section = 'characteristics' - elif 'Noise level:' in line: - noise_level = line.replace('Noise level:', '').strip() - elif 'Barking triggers:' in line: - current_section = 'triggers' - elif line.startswith('•'): - if current_section == 'characteristics': - noise_characteristics.append(line[1:].strip()) - elif current_section == 'triggers': - barking_triggers.append(line[1:].strip()) - - # 生成特徵和觸發因素的HTML - noise_characteristics_html = '\n'.join([f'
  • {item}
  • ' for item in noise_characteristics]) - barking_triggers_html = '\n'.join([f'
  • {item}
  • ' for item in barking_triggers]) - - # 處理健康資訊 - health_notes = health_info.get('health_notes', '').split('\n') - health_considerations = [] - health_screenings = [] - - current_section = None - for line in health_notes: - line = line.strip() - if 'Common breed-specific health considerations' in line: - current_section = 'considerations' - elif 'Recommended health screenings:' in line: - current_section = 'screenings' - elif line.startswith('•'): - if current_section == 'considerations': - health_considerations.append(line[1:].strip()) - elif current_section == 'screenings': - health_screenings.append(line[1:].strip()) - - health_considerations_html = '\n'.join([f'
  • {item}
  • ' for item in health_considerations]) - health_screenings_html = '\n'.join([f'
  • {item}
  • ' for item in health_screenings]) - - # 獎勵原因計算 - bonus_reasons = [] - temperament = info.get('Temperament', '').lower() - if any(trait in temperament for trait in ['friendly', 'gentle', 'affectionate']): - bonus_reasons.append("Positive temperament traits") - if info.get('Good with Children') == 'Yes': - bonus_reasons.append("Excellent with children") - try: - lifespan = info.get('Lifespan', '10-12 years') - years = int(lifespan.split('-')[0]) - if years >= 12: - bonus_reasons.append("Above-average lifespan") - except: - pass + # 解析噪音和健康資訊 + noise_characteristics, barking_triggers, noise_level = parse_noise_information(noise_info) + health_considerations, health_screenings = parse_health_information(health_info) + # 計算獎勵因素 + _, bonus_reasons = calculate_breed_bonus_factors(info, None) # User prefs not needed for display + + # 生成品種卡片標題 + html_content += formatter.generate_breed_card_header(breed, rank, final_score, is_description_search) + + # 品種詳細資訊區域 - 使用格式器方法簡化 + tooltip_html = formatter.generate_tooltips_section() + html_content += f""" -
    -
    -

    - 🏆 #{rank} {breed.replace('_', ' ')} - - Overall Match: {final_score*100:.1f}% - -

    +
    - +
    - Space Compatibility: - - - - Space Compatibility Score:
    - • Evaluates how well the breed adapts to your living environment
    - • Considers if your home (apartment/house) and yard access suit the breed’s size
    - • Higher score means the breed fits well in your available space. -
    -
    + Space Compatibility:{tooltip_html}
    - {display_scores['space'] if is_description_search else scores.get('space', 0)*100:.1f}% + {display_scores['space']:.1f}%
    - +
    Exercise Match: @@ -266,18 +144,18 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc Exercise Match Score:
    • Based on your daily exercise time and type
    - • Compares your activity level to the breed’s exercise needs
    - • Higher score means your routine aligns well with the breed’s energy requirements. + • Compares your activity level to the breed's exercise needs
    + • Higher score means your routine aligns well with the breed's energy requirements.
    - {display_scores['exercise'] if is_description_search else scores.get('exercise', 0)*100:.1f}% + {display_scores['exercise']:.1f}%
    - +
    Grooming Match: @@ -285,19 +163,19 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc Grooming Match Score:
    - • Evaluates breed’s grooming needs (coat care, trimming, brushing)
    + • Evaluates breed's grooming needs (coat care, trimming, brushing)
    • Compares these requirements with your grooming commitment level
    - • Higher score means the breed’s grooming needs fit your willingness and capability. + • Higher score means the breed's grooming needs fit your willingness and capability.
    - {display_scores['grooming'] if is_description_search else scores.get('grooming', 0)*100:.1f}% + {display_scores['grooming']:.1f}%
    - +
    Experience Match: @@ -306,7 +184,7 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc Experience Match Score:
    • Based on your dog-owning experience level
    - • Considers breed’s training complexity, temperament, and handling difficulty
    + • Considers breed's training complexity, temperament, and handling difficulty
    • Higher score means the breed is more suitable for your experience level.
    @@ -314,10 +192,10 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc
    - {display_scores['experience'] if is_description_search else scores.get('experience', 0)*100:.1f}% + {display_scores['experience']:.1f}%
    - +
    Noise Compatibility: @@ -334,7 +212,7 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc
    - {display_scores['noise'] if is_description_search else scores.get('noise', 0)*100:.1f}% + {display_scores['noise']:.1f}%
    {f''' @@ -345,8 +223,7 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc Breed Bonus Points:
    - • {('
    • '.join(bonus_reasons)) if bonus_reasons else 'No additional bonus points'}
    -
    + • {('
    • '.join(bonus_reasons) if bonus_reasons else 'No additional bonus points')}

    Bonus Factors Include:
    • Friendly temperament
    • Child compatibility
    @@ -362,328 +239,167 @@ def format_recommendation_html(recommendations: List[Dict], is_description_searc
    ''' if bonus_score > 0 else ''}
    -
    -

    - 📋 Breed Details -

    -
    -
    - - 📏 - Size: - - - Size Categories:
    - • Small: Under 20 pounds
    - • Medium: 20-60 pounds
    - • Large: Over 60 pounds -
    - {info['Size']} -
    -
    -
    - - 🏃 - Exercise Needs: - - - Exercise Needs:
    - • Low: Short walks
    - • Moderate: 1-2 hours daily
    - • High: 2+ hours daily
    - • Very High: Constant activity -
    - {info['Exercise Needs']} -
    -
    -
    - - 👨‍👩‍👧‍👦 - Good with Children: - - - Child Compatibility:
    - • Yes: Excellent with kids
    - • Moderate: Good with older children
    - • No: Better for adult households -
    - {info['Good with Children']} -
    -
    -
    - - - Lifespan: - - - Average Lifespan:
    - • Short: 6-8 years
    - • Average: 10-15 years
    - • Long: 12-20 years
    - • Varies by size: Larger breeds typically have shorter lifespans -
    -
    - {info['Lifespan']} -
    -
    -
    -
    -

    - 📝 Description -

    -

    {info.get('Description', '')}

    + """ + + # 使用格式器生成詳細區段 + html_content += formatter.generate_detailed_sections_html( + breed, info, noise_characteristics, barking_triggers, noise_level, + health_considerations, health_screenings + ) + + html_content += """ +
    +
    + """ + + # 結束 HTML 內容 + html_content += "
    " + return html_content + + +def format_unified_recommendation_html(recommendations: List[Dict], is_description_search: bool = False) -> str: + """統一推薦HTML格式化主函數,確保視覺呈現與數值計算完全一致""" + + # 創建HTML格式器實例 + formatter = RecommendationHTMLFormatter() + + if not recommendations: + return ''' +
    +
    🐕
    +

    No Recommendations Available

    +

    Please try adjusting your preferences or description, and we'll help you find the most suitable breeds.

    +
    + ''' + + # 使用格式器的統一CSS樣式 + html_content = formatter.unified_css + "
    " + + for rec in recommendations: + breed = rec['breed'] + rank = rec.get('rank', 0) + + # 統一分數處理 + overall_score = rec.get('overall_score', rec.get('final_score', 0.7)) + scores = rec.get('scores', {}) + + # 如果沒有維度分數,基於總分生成一致的維度分數 + if not scores: + scores = generate_dimension_scores_for_display( + overall_score, rank, breed, is_description_search=is_description_search + ) + + # 獲取品種資訊 + breed_name_for_db = breed.replace(' ', '_') + breed_info = get_dog_description(breed_name_for_db) or {} + + # 維度標籤 + dimension_labels = { + 'space': '🏠 Space Compatibility', + 'exercise': '🏃 Exercise Requirements', + 'grooming': '✂️ Grooming Needs', + 'experience': '🎓 Experience Level', + 'noise': '🔊 Noise Control', + 'family': '👨‍👩‍👧‍👦 Family Compatibility' + } + + # 維度提示氣泡內容 + tooltip_content = { + 'space': 'Space Compatibility Score:
    • Evaluates how well the breed adapts to your living environment
    • Considers if your home (apartment/house) and yard access suit the breed\'s size
    • Higher score means the breed fits well in your available space.', + 'exercise': 'Exercise Requirements Score:
    • Based on your daily exercise time and activity type
    • Compares your activity level to the breed\'s exercise needs
    • Higher score means your routine aligns well with the breed\'s energy requirements.', + 'grooming': 'Grooming Needs Score:
    • Evaluates breed\'s grooming needs (coat care, trimming, brushing)
    • Compares these requirements with your grooming commitment level
    • Higher score means the breed\'s grooming needs fit your willingness and capability.', + 'experience': 'Experience Level Score:
    • Based on your dog-owning experience level
    • Considers breed\'s training complexity, temperament, and handling difficulty
    • Higher score means the breed is more suitable for your experience level.', + 'noise': 'Noise Control Score:
    • Based on your noise tolerance preference
    • Considers breed\'s typical noise level and barking tendencies
    • Accounts for living environment and sensitivity to noise.', + 'family': 'Family Compatibility Score:
    • Evaluates how well the breed fits with your family situation
    • Considers children, other pets, and family dynamics
    • Higher score means better family compatibility.' + } + + # 生成維度分數HTML + dimension_html = "" + for dim, label in dimension_labels.items(): + score = scores.get(dim, overall_score * 0.9) + percentage = formatter.format_unified_percentage(score) + progress_bar = formatter.generate_unified_progress_bar(score) + + # 為 Find by Description 添加提示氣泡 + tooltip_html = '' + if is_description_search: + tooltip_html = f'i{tooltip_content.get(dim, "")}' + + dimension_html += f''' +
    +
    + {label} {tooltip_html} + {percentage}
    -
    -

    - 🔊 Noise Behavior - - - - Noise Behavior:
    - • Typical vocalization patterns
    - • Common triggers and frequency
    - • Based on breed characteristics -
    -
    -

    -
    -
    -

    Typical noise characteristics:

    -
    -
    Moderate to high barker
    -
    Alert watch dog
    -
    Attention-seeking barks
    -
    Social vocalizations
    -
    -
    -

    Noise level:

    -
    - Moderate-High -
    - - - -
    -
    -
    -

    Barking triggers:

    -
    -
    Separation anxiety
    -
    Attention needs
    -
    Strange noises
    -
    Excitement
    -
    -
    -
    -

    Source: Compiled from various breed behavior resources, 2024

    -

    Individual dogs may vary in their vocalization patterns.

    -

    Training can significantly influence barking behavior.

    -

    Environmental factors may affect noise levels.

    -
    -
    + {progress_bar} +
    + ''' + + # 生成品種資訊HTML + characteristics = generate_breed_characteristics_data(breed_info) + info_html = "" + for label, value in characteristics: + if label != 'Description': # Skip description as it's shown separately + info_html += f''' +
    +
    {label}
    +
    {value}
    -
    -

    - 🏥 Health Insights - - - - Health information is compiled from multiple sources including veterinary resources, breed guides, and international canine health databases. - Each dog is unique and may vary from these general guidelines. - - -

    -
    -
    -
    -

    Common breed-specific health considerations:

    -
    -
    Patellar luxation
    -
    Progressive retinal atrophy
    -
    Von Willebrand's disease
    -
    Open fontanel
    -
    -
    -
    -

    Recommended health screenings:

    -
    -
    Patella evaluation
    -
    Eye examination
    -
    Blood clotting tests
    -
    Skull development monitoring
    -
    -
    -
    -
    -

    Source: Compiled from various veterinary and breed information resources, 2024

    -

    This information is for reference only and based on breed tendencies.

    -

    Each dog is unique and may not develop any or all of these conditions.

    -

    Always consult with qualified veterinarians for professional advice.

    -
    + ''' + + # 生成單個品種卡片HTML + overall_percentage = formatter.format_unified_percentage(overall_score) + overall_progress_bar = formatter.generate_unified_progress_bar(overall_score) + + brand_card_html = f''' +
    +
    +
    +
    🏆 #{rank}
    +

    {breed.replace('_', ' ')}

    +
    +
    Overall Match: {overall_percentage}
    - +
    + +
    + {overall_progress_bar} +
    + +
    + {dimension_html} +
    + +
    + {info_html} +
    + +
    +

    + 📝 Breed Description +

    +

    + {breed_info.get('Description', 'Detailed description for this breed is not currently available.')} +

    + + 🌐 + Learn more about {breed.replace('_', ' ')} on AKC website +
    - """ + ''' + + html_content += brand_card_html html_content += "
    " return html_content - -def get_breed_recommendations(user_prefs: UserPreferences, top_n: int = 15) -> List[Dict]: - """基於使用者偏好推薦狗品種,確保正確的分數排序""" - print("Starting get_breed_recommendations") - recommendations = [] - seen_breeds = set() - - try: - # 獲取所有品種 - conn = sqlite3.connect('animal_detector.db') - cursor = conn.cursor() - cursor.execute("SELECT Breed FROM AnimalCatalog") - all_breeds = cursor.fetchall() - conn.close() - - # 收集所有品種的分數 - for breed_tuple in all_breeds: - breed = breed_tuple[0] - base_breed = breed.split('(')[0].strip() - - if base_breed in seen_breeds: - continue - seen_breeds.add(base_breed) - - # 獲取品種資訊 - breed_info = get_dog_description(breed) - if not isinstance(breed_info, dict): - continue - - if user_prefs.size_preference != "no_preference": - breed_size = breed_info.get('Size', '').lower() - user_size = user_prefs.size_preference.lower() - if breed_size != user_size: - continue - - # 獲取噪音資訊 - noise_info = breed_noise_info.get(breed, { - "noise_notes": "Noise information not available", - "noise_level": "Unknown", - "source": "N/A" - }) - - # 將噪音資訊整合到品種資訊中 - breed_info['noise_info'] = noise_info - - # 計算基礎相容性分數 - compatibility_scores = calculate_compatibility_score(breed_info, user_prefs) - - # 計算品種特定加分 - breed_bonus = 0.0 - - # 壽命加分 - try: - lifespan = breed_info.get('Lifespan', '10-12 years') - years = [int(x) for x in lifespan.split('-')[0].split()[0:1]] - longevity_bonus = min(0.02, (max(years) - 10) * 0.005) - breed_bonus += longevity_bonus - except: - pass - - # 性格特徵加分 - temperament = breed_info.get('Temperament', '').lower() - positive_traits = ['friendly', 'gentle', 'affectionate', 'intelligent'] - negative_traits = ['aggressive', 'stubborn', 'dominant'] - - breed_bonus += sum(0.01 for trait in positive_traits if trait in temperament) - breed_bonus -= sum(0.01 for trait in negative_traits if trait in temperament) - - # 與孩童相容性加分 - if user_prefs.has_children: - if breed_info.get('Good with Children') == 'Yes': - breed_bonus += 0.02 - elif breed_info.get('Good with Children') == 'No': - breed_bonus -= 0.03 - - # 噪音相關加分 - if user_prefs.noise_tolerance == 'low': - if noise_info['noise_level'].lower() == 'high': - breed_bonus -= 0.03 - elif noise_info['noise_level'].lower() == 'low': - breed_bonus += 0.02 - elif user_prefs.noise_tolerance == 'high': - if noise_info['noise_level'].lower() == 'high': - breed_bonus += 0.01 - - # 計算最終分數 - breed_bonus = round(breed_bonus, 4) - final_score = round(compatibility_scores['overall'] + breed_bonus, 4) - - recommendations.append({ - 'breed': breed, - 'base_score': round(compatibility_scores['overall'], 4), - 'bonus_score': round(breed_bonus, 4), - 'final_score': final_score, - 'scores': compatibility_scores, - 'info': breed_info, - 'noise_info': noise_info # 添加噪音資訊到推薦結果 - }) - - # 嚴格按照 final_score 排序 - recommendations.sort(key=lambda x: (round(-x['final_score'], 4), x['breed'] )) # 負號降序排列 - - # 選擇前N名並確保正確排序 - final_recommendations = [] - last_score = None - rank = 1 - - available_breeds = len(recommendations) - max_to_return = min(available_breeds, top_n) # 不會超過實際可用品種數 - - for rec in recommendations: - if len(final_recommendations) >= max_to_return: - break - - current_score = rec['final_score'] - if last_score is not None and current_score > last_score: - continue - - rec['rank'] = rank - final_recommendations.append(rec) - last_score = current_score - rank += 1 - - # 驗證最終排序 - for i in range(len(final_recommendations)-1): - current = final_recommendations[i] - next_rec = final_recommendations[i+1] - - if current['final_score'] < next_rec['final_score']: - print(f"Warning: Sorting error detected!") - print(f"#{i+1} {current['breed']}: {current['final_score']}") - print(f"#{i+2} {next_rec['breed']}: {next_rec['final_score']}") - - # 交換位置 - final_recommendations[i], final_recommendations[i+1] = \ - final_recommendations[i+1], final_recommendations[i] - - # 打印最終結果以供驗證 - print("\nFinal Rankings:") - for rec in final_recommendations: - print(f"#{rec['rank']} {rec['breed']}") - print(f"Base Score: {rec['base_score']:.4f}") - print(f"Bonus: {rec['bonus_score']:.4f}") - print(f"Final Score: {rec['final_score']:.4f}\n") - - return final_recommendations - - except Exception as e: - print(f"Error in get_breed_recommendations: {str(e)}") - print(f"Traceback: {traceback.format_exc()}") - return [] \ No newline at end of file diff --git a/recommendation_html_formatter.py b/recommendation_html_formatter.py new file mode 100644 index 0000000000000000000000000000000000000000..dc6ef100db66932d3fe79d23c5c3a9e72090b5d2 --- /dev/null +++ b/recommendation_html_formatter.py @@ -0,0 +1,1025 @@ +import random +from typing import List, Dict +from breed_health_info import breed_health_info, default_health_note +from breed_noise_info import breed_noise_info +from dog_database import get_dog_description +from recommendation_formatter import ( + generate_breed_characteristics_data, + parse_noise_information, + parse_health_information, + calculate_breed_bonus_factors, + generate_dimension_scores_for_display +) + +class RecommendationHTMLFormatter: + """處理推薦結果的HTML和CSS格式化""" + + def __init__(self): + self.description_search_css = """ + + """ + + self.criteria_search_css = """ + + """ + + self.unified_css = """ + + """ + + def format_unified_percentage(self, score: float) -> str: + """統一格式化百分比顯示,確保數值邏輯一致""" + try: + # 確保分數在0-1範圍內 + normalized_score = max(0.0, min(1.0, float(score))) + # 轉換為百分比並保留一位小數 + percentage = normalized_score * 100 + return f"{percentage:.1f}%" + except Exception as e: + print(f"Error formatting percentage: {str(e)}") + return "70.0%" + + def generate_unified_progress_bar(self, score: float) -> str: + """Generate unified progress bar HTML with width directly corresponding to score""" + try: + # Ensure score is in 0-1 range + normalized_score = max(0.0, min(1.0, float(score))) + + # Progress bar width with reasonable visual mapping + # High scores get enhanced visual representation for impact + if normalized_score >= 0.9: + width_percentage = 85 + (normalized_score - 0.9) * 130 # 85-98% for excellent scores + elif normalized_score >= 0.8: + width_percentage = 70 + (normalized_score - 0.8) * 150 # 70-85% for very good scores + elif normalized_score >= 0.7: + width_percentage = 55 + (normalized_score - 0.7) * 150 # 55-70% for good scores + elif normalized_score >= 0.5: + width_percentage = 30 + (normalized_score - 0.5) * 125 # 30-55% for fair scores + else: + width_percentage = 8 + normalized_score * 44 # 8-30% for low scores + + # Ensure reasonable bounds + width_percentage = max(5, min(98, width_percentage)) + + # Choose color based on score with appropriate theme + # This is used for unified recommendations (Description search) + if normalized_score >= 0.9: + color = '#10b981' # Excellent (emerald green) + elif normalized_score >= 0.8: + color = '#06b6d4' # Good (cyan) + elif normalized_score >= 0.7: + color = '#3b82f6' # Fair (blue) + elif normalized_score >= 0.6: + color = '#1d4ed8' # Average (darker blue) + elif normalized_score >= 0.5: + color = '#1e40af' # Below average (dark blue) + else: + color = '#ef4444' # Poor (red) + + return f''' +
    +
    +
    + ''' + + except Exception as e: + print(f"Error generating progress bar: {str(e)}") + return '
    ' + + def generate_progress_bar(self, score: float, score_type: str = None, is_percentage_display: bool = False, is_description_search: bool = False) -> dict: + """ + Generate progress bar width and color with consistent score-to-visual mapping + + Parameters: + score: Score value (float between 0-1 or percentage 0-100) + score_type: Score type for special handling + is_percentage_display: Whether the score is in percentage format + + Returns: + dict: Dictionary containing width and color + """ + # Normalize score to 0-1 range + if is_percentage_display: + normalized_score = score / 100.0 # Convert percentage to 0-1 range + else: + normalized_score = score + + # Ensure score is within valid range + normalized_score = max(0.0, min(1.0, normalized_score)) + + # Calculate progress bar width - simplified for Find by Criteria + if not is_description_search and score_type != 'bonus': + # Find by Criteria: 調整為更有說服力的視覺比例 + percentage = normalized_score * 100 + if percentage >= 95: + width = 92 + (percentage - 95) * 1.2 # 95%+ 顯示為 92-98% + elif percentage >= 90: + width = 85 + (percentage - 90) # 90-95% 顯示為 85-92% + elif percentage >= 80: + width = 75 + (percentage - 80) * 1.0 # 80-90% 顯示為 75-85% + elif percentage >= 70: + width = 60 + (percentage - 70) * 1.5 # 70-80% 顯示為 60-75% + else: + width = percentage * 0.8 # 70% 以下按比例縮放 + width = max(5, min(98, width)) + elif score_type == 'bonus': + # Bonus scores are typically smaller, need amplified display + width = max(5, min(95, normalized_score * 150)) # Amplified for visibility + else: + # Find by Description: 保持現有的複雜計算 + if normalized_score >= 0.8: + width = 75 + (normalized_score - 0.8) * 115 # 75-98% range for high scores + elif normalized_score >= 0.6: + width = 50 + (normalized_score - 0.6) * 125 # 50-75% range for good scores + elif normalized_score >= 0.4: + width = 25 + (normalized_score - 0.4) * 125 # 25-50% range for fair scores + else: + width = 5 + normalized_score * 50 # 5-25% range for low scores + + width = max(3, min(98, width)) + + # Color coding based on normalized score - Criteria uses green gradation + if is_description_search: + # Find by Description uses blue theme + if normalized_score >= 0.9: + color = '#10b981' # Excellent (emerald green) + elif normalized_score >= 0.85: + color = '#06b6d4' # Very good (cyan) + elif normalized_score >= 0.8: + color = '#3b82f6' # Good (blue) + elif normalized_score >= 0.7: + color = '#1d4ed8' # Fair (darker blue) + elif normalized_score >= 0.6: + color = '#1e40af' # Below average (dark blue) + elif normalized_score >= 0.5: + color = '#f59e0b' # Poor (amber) + else: + color = '#ef4444' # Very poor (red) + else: + # Find by Criteria uses original green gradation + if normalized_score >= 0.9: + color = '#22c55e' # Excellent (bright green) + elif normalized_score >= 0.85: + color = '#65a30d' # Very good (green) + elif normalized_score >= 0.8: + color = '#a3a332' # Good (yellow-green) + elif normalized_score >= 0.7: + color = '#d4a332' # Fair (yellow) + elif normalized_score >= 0.6: + color = '#e67e22' # Below average (orange) + elif normalized_score >= 0.5: + color = '#e74c3c' # Poor (red) + else: + color = '#c0392b' # Very poor (dark red) + + return { + 'width': width, + 'color': color + } + + def get_css_styles(self, is_description_search: bool) -> str: + """根據搜尋類型返回對應的CSS樣式""" + if is_description_search: + return self.description_search_css + else: + return self.criteria_search_css + + def generate_breed_card_header(self, breed: str, rank: int, final_score: float, is_description_search: bool) -> str: + """生成品種卡片標題部分的HTML""" + rank_class = f"rank-{rank}" if rank <= 3 else "rank-other" + percentage = final_score * 100 + + if percentage >= 90: + score_class = "score-excellent" + fill_class = "fill-excellent" + match_label = "EXCELLENT MATCH" + elif percentage >= 70: + score_class = "score-good" + fill_class = "fill-good" + match_label = "GOOD MATCH" + else: + score_class = "score-moderate" + fill_class = "fill-moderate" + match_label = "MODERATE MATCH" + + if is_description_search: + # Find by Description: 使用現有複雜設計 + return f""" +
    +
    #{rank}
    + +
    +

    {breed.replace('_', ' ')}

    +
    +
    {percentage:.1f}%
    +
    {match_label}
    +
    +
    +
    +
    +
    """ + else: + # Find by Criteria: 使用簡潔設計,包含獎盃圖示 + # 計算進度條寬度 - 調整為更有說服力的視覺比例 + if percentage >= 95: + score_width = 92 + (percentage - 95) * 1.2 # 95%+ 顯示為 92-98% + elif percentage >= 90: + score_width = 85 + (percentage - 90) # 90-95% 顯示為 85-92% + elif percentage >= 80: + score_width = 75 + (percentage - 80) * 1.0 # 80-90% 顯示為 75-85% + elif percentage >= 70: + score_width = 60 + (percentage - 70) * 1.5 # 70-80% 顯示為 60-75% + else: + score_width = percentage * 0.8 # 70% 以下按比例縮放 + score_width = max(5, min(98, score_width)) + + return f""" +
    +
    +
    +
    🏆 #{rank}
    +

    {breed.replace('_', ' ')}

    +
    +
    +
    {percentage:.1f}%
    +
    OVERALL MATCH
    +
    +
    +
    +
    +
    """ + + def generate_tooltips_section(self) -> str: + """生成提示氣泡HTML""" + return ''' + + + + Space Compatibility Score:
    + • Evaluates how well the breed adapts to your living environment
    + • Considers if your home (apartment/house) and yard access suit the breed's size
    + • Higher score means the breed fits well in your available space. +
    +
    ''' + + def generate_detailed_sections_html(self, breed: str, info: dict, + noise_characteristics: List[str], + barking_triggers: List[str], + noise_level: str, + health_considerations: List[str], + health_screenings: List[str]) -> str: + """生成詳細區段的HTML""" + # 生成特徵和觸發因素的HTML + noise_characteristics_html = '\n'.join([f'
  • {item}
  • ' for item in noise_characteristics]) + barking_triggers_html = '\n'.join([f'
  • {item}
  • ' for item in barking_triggers]) + health_considerations_html = '\n'.join([f'
  • {item}
  • ' for item in health_considerations]) + health_screenings_html = '\n'.join([f'
  • {item}
  • ' for item in health_screenings]) + + return f""" +
    +

    + 📋 Breed Details +

    +
    +
    + + 📏 + Size: + + + Size Categories:
    + • Small: Under 20 pounds
    + • Medium: 20-60 pounds
    + • Large: Over 60 pounds +
    + {info['Size']} +
    +
    +
    + + 🏃 + Exercise Needs: + + + Exercise Needs:
    + • Low: Short walks
    + • Moderate: 1-2 hours daily
    + • High: 2+ hours daily
    + • Very High: Constant activity +
    + {info['Exercise Needs']} +
    +
    +
    + + 👨‍👩‍👧‍👦 + Good with Children: + + + Child Compatibility:
    + • Yes: Excellent with kids
    + • Moderate: Good with older children
    + • No: Better for adult households +
    + {info['Good with Children']} +
    +
    +
    + + + Lifespan: + + + Average Lifespan:
    + • Short: 6-8 years
    + • Average: 10-15 years
    + • Long: 12-20 years
    + • Varies by size: Larger breeds typically have shorter lifespans +
    +
    + {info['Lifespan']} +
    +
    +
    +
    +

    + 📝 Description +

    +

    {info.get('Description', '')}

    +
    +
    +

    + 🔊 Noise Behavior + + + + Noise Behavior:
    + • Typical vocalization patterns
    + • Common triggers and frequency
    + • Based on breed characteristics +
    +
    +

    +
    +
    +

    Typical noise characteristics:

    +
    +
    Moderate to high barker
    +
    Alert watch dog
    +
    Attention-seeking barks
    +
    Social vocalizations
    +
    +
    +

    Noise level:

    +
    + Moderate-High +
    + + + +
    +
    +
    +

    Barking triggers:

    +
    +
    Separation anxiety
    +
    Attention needs
    +
    Strange noises
    +
    Excitement
    +
    +
    +
    +

    Source: Compiled from various breed behavior resources, 2024

    +

    Individual dogs may vary in their vocalization patterns.

    +

    Training can significantly influence barking behavior.

    +

    Environmental factors may affect noise levels.

    +
    +
    +
    +
    +

    + 🏥 Health Insights + + + + Health information is compiled from multiple sources including veterinary resources, breed guides, and international canine health databases. + Each dog is unique and may vary from these general guidelines. + + +

    +
    +
    +
    +

    Common breed-specific health considerations:

    +
    +
    Patellar luxation
    +
    Progressive retinal atrophy
    +
    Von Willebrand's disease
    +
    Open fontanel
    +
    +
    +
    +

    Recommended health screenings:

    +
    +
    Patella evaluation
    +
    Eye examination
    +
    Blood clotting tests
    +
    Skull development monitoring
    +
    +
    +
    +
    +

    Source: Compiled from various veterinary and breed information resources, 2024

    +

    This information is for reference only and based on breed tendencies.

    +

    Each dog is unique and may not develop any or all of these conditions.

    +

    Always consult with qualified veterinarians for professional advice.

    +
    +
    +
    + + """ diff --git a/score_calibrator.py b/score_calibrator.py new file mode 100644 index 0000000000000000000000000000000000000000..975076523d7f4c28ac0451a63754201bc918d4c6 --- /dev/null +++ b/score_calibrator.py @@ -0,0 +1,477 @@ +import numpy as np +from typing import List, Dict, Tuple, Any, Optional +from dataclasses import dataclass, field +import traceback +from scipy import stats + +@dataclass +class CalibrationResult: + """校準結果結構""" + original_scores: List[float] + calibrated_scores: List[float] + score_mapping: Dict[str, float] # breed -> calibrated_score + calibration_method: str + distribution_stats: Dict[str, float] + quality_metrics: Dict[str, float] = field(default_factory=dict) + +@dataclass +class ScoreDistribution: + """分數分布統計""" + mean: float + std: float + min_score: float + max_score: float + percentile_5: float + percentile_95: float + compression_ratio: float # 分數壓縮比率 + effective_range: float # 有效分數範圍 + +class ScoreCalibrator: + """ + 動態分數校準系統 + 解決分數壓縮問題並保持相對排名 + """ + + def __init__(self): + """初始化校準器""" + self.calibration_methods = { + 'dynamic_range_mapping': self._dynamic_range_mapping, + 'percentile_stretching': self._percentile_stretching, + 'gaussian_normalization': self._gaussian_normalization, + 'sigmoid_transformation': self._sigmoid_transformation + } + self.quality_thresholds = { + 'min_effective_range': 0.3, # 最小有效分數範圍 + 'max_compression_ratio': 0.2, # 最大允許壓縮比率 + 'target_distribution_range': (0.45, 0.95) # 目標分布範圍 + } + + def calibrate_scores(self, breed_scores: List[Tuple[str, float]], + method: str = 'auto') -> CalibrationResult: + """ + 校準品種分數 + + Args: + breed_scores: (breed_name, score) 元組列表 + method: 校準方法 ('auto', 'dynamic_range_mapping', 'percentile_stretching', etc.) + + Returns: + CalibrationResult: 校準結果 + """ + try: + if not breed_scores: + return CalibrationResult( + original_scores=[], + calibrated_scores=[], + score_mapping={}, + calibration_method='none', + distribution_stats={} + ) + + # 提取分數和品種名稱 + breeds = [item[0] for item in breed_scores] + original_scores = [item[1] for item in breed_scores] + + # 分析原始分數分布 + distribution = self._analyze_score_distribution(original_scores) + + # 選擇校準方法 + if method == 'auto': + method = self._select_calibration_method(distribution) + + # 應用校準 + calibration_func = self.calibration_methods.get(method, self._dynamic_range_mapping) + calibrated_scores = calibration_func(original_scores, distribution) + + # 保持排名一致性 + calibrated_scores = self._preserve_ranking(original_scores, calibrated_scores) + + # 建立分數映射 + score_mapping = dict(zip(breeds, calibrated_scores)) + + # 計算品質指標 + quality_metrics = self._calculate_quality_metrics( + original_scores, calibrated_scores, distribution + ) + + return CalibrationResult( + original_scores=original_scores, + calibrated_scores=calibrated_scores, + score_mapping=score_mapping, + calibration_method=method, + distribution_stats=self._distribution_to_dict(distribution), + quality_metrics=quality_metrics + ) + + except Exception as e: + print(f"Error calibrating scores: {str(e)}") + print(traceback.format_exc()) + # 回傳原始分數作為降級方案 + breeds = [item[0] for item in breed_scores] + original_scores = [item[1] for item in breed_scores] + return CalibrationResult( + original_scores=original_scores, + calibrated_scores=original_scores, + score_mapping=dict(zip(breeds, original_scores)), + calibration_method='fallback', + distribution_stats={} + ) + + def _analyze_score_distribution(self, scores: List[float]) -> ScoreDistribution: + """分析分數分布""" + try: + scores_array = np.array(scores) + + # 基本統計 + mean_score = np.mean(scores_array) + std_score = np.std(scores_array) + min_score = np.min(scores_array) + max_score = np.max(scores_array) + + # 百分位數 + percentile_5 = np.percentile(scores_array, 5) + percentile_95 = np.percentile(scores_array, 95) + + # 壓縮比率和有效範圍 + full_range = max_score - min_score + effective_range = percentile_95 - percentile_5 + compression_ratio = 1.0 - (effective_range / 1.0) if full_range > 0 else 0.0 + + return ScoreDistribution( + mean=mean_score, + std=std_score, + min_score=min_score, + max_score=max_score, + percentile_5=percentile_5, + percentile_95=percentile_95, + compression_ratio=compression_ratio, + effective_range=effective_range + ) + + except Exception as e: + print(f"Error analyzing score distribution: {str(e)}") + # 返回預設分布 + return ScoreDistribution( + mean=0.5, std=0.1, min_score=0.0, max_score=1.0, + percentile_5=0.4, percentile_95=0.6, + compression_ratio=0.6, effective_range=0.2 + ) + + def _select_calibration_method(self, distribution: ScoreDistribution) -> str: + """根據分布特性選擇校準方法""" + # 高度壓縮的分數需要強力展開 + if distribution.compression_ratio > 0.8: + return 'percentile_stretching' + + # 中等壓縮使用動態範圍映射 + elif distribution.compression_ratio > 0.5: + return 'dynamic_range_mapping' + + # 分數集中在中間使用 sigmoid 轉換 + elif 0.4 <= distribution.mean <= 0.6 and distribution.std < 0.1: + return 'sigmoid_transformation' + + # 其他情況使用高斯正規化 + else: + return 'gaussian_normalization' + + def _dynamic_range_mapping(self, scores: List[float], + distribution: ScoreDistribution) -> List[float]: + """動態範圍映射校準""" + try: + scores_array = np.array(scores) + + # 使用5%和95%百分位數作為邊界 + lower_bound = distribution.percentile_5 + upper_bound = distribution.percentile_95 + + # 避免除零 + if upper_bound - lower_bound < 0.001: + upper_bound = distribution.max_score + lower_bound = distribution.min_score + if upper_bound - lower_bound < 0.001: + return scores # 所有分數相同,無需校準 + + # 映射到目標範圍 [0.45, 0.95] + target_min, target_max = self.quality_thresholds['target_distribution_range'] + + # 線性映射 + normalized = (scores_array - lower_bound) / (upper_bound - lower_bound) + normalized = np.clip(normalized, 0, 1) # 限制在 [0,1] 範圍 + calibrated = target_min + normalized * (target_max - target_min) + + return calibrated.tolist() + + except Exception as e: + print(f"Error in dynamic range mapping: {str(e)}") + return scores + + def _percentile_stretching(self, scores: List[float], + distribution: ScoreDistribution) -> List[float]: + """百分位數拉伸校準""" + try: + scores_array = np.array(scores) + + # 計算百分位數排名 + percentile_ranks = stats.rankdata(scores_array, method='average') / len(scores_array) + + # 使用平方根轉換來增強差異 + stretched_ranks = np.sqrt(percentile_ranks) + + # 映射到目標範圍 + target_min, target_max = self.quality_thresholds['target_distribution_range'] + calibrated = target_min + stretched_ranks * (target_max - target_min) + + return calibrated.tolist() + + except Exception as e: + print(f"Error in percentile stretching: {str(e)}") + return self._dynamic_range_mapping(scores, distribution) + + def _gaussian_normalization(self, scores: List[float], + distribution: ScoreDistribution) -> List[float]: + """高斯正規化校準""" + try: + scores_array = np.array(scores) + + # Z-score 正規化 + if distribution.std > 0: + z_scores = (scores_array - distribution.mean) / distribution.std + # 限制 Z-scores 在合理範圍內 + z_scores = np.clip(z_scores, -3, 3) + else: + z_scores = np.zeros_like(scores_array) + + # 轉換到目標範圍 + target_min, target_max = self.quality_thresholds['target_distribution_range'] + target_mean = (target_min + target_max) / 2 + target_std = (target_max - target_min) / 6 # 3-sigma 範圍 + + calibrated = target_mean + z_scores * target_std + calibrated = np.clip(calibrated, target_min, target_max) + + return calibrated.tolist() + + except Exception as e: + print(f"Error in gaussian normalization: {str(e)}") + return self._dynamic_range_mapping(scores, distribution) + + def _sigmoid_transformation(self, scores: List[float], + distribution: ScoreDistribution) -> List[float]: + """Sigmoid 轉換校準""" + try: + scores_array = np.array(scores) + + # 中心化分數 + centered = scores_array - distribution.mean + + # Sigmoid 轉換 (增強中等分數的差異) + sigmoid_factor = 10.0 # 控制 sigmoid 的陡峭程度 + transformed = 1 / (1 + np.exp(-sigmoid_factor * centered)) + + # 映射到目標範圍 + target_min, target_max = self.quality_thresholds['target_distribution_range'] + calibrated = target_min + transformed * (target_max - target_min) + + return calibrated.tolist() + + except Exception as e: + print(f"Error in sigmoid transformation: {str(e)}") + return self._dynamic_range_mapping(scores, distribution) + + def _preserve_ranking(self, original_scores: List[float], + calibrated_scores: List[float]) -> List[float]: + """確保校準後的分數保持原始排名""" + try: + # 獲取原始排名 + original_ranks = stats.rankdata([-score for score in original_scores], method='ordinal') + + # 獲取校準後的排名 + calibrated_with_ranks = list(zip(calibrated_scores, original_ranks)) + + # 按原始排名排序校準後的分數 + calibrated_with_ranks.sort(key=lambda x: x[1]) + + # 重新分配分數以保持排名但使用校準後的分布 + sorted_calibrated = sorted(calibrated_scores, reverse=True) + + # 建立新的分數列表 + preserved_scores = [0.0] * len(original_scores) + for i, (_, original_rank) in enumerate(calibrated_with_ranks): + # 找到原始位置 + original_index = original_ranks.tolist().index(original_rank) + preserved_scores[original_index] = sorted_calibrated[i] + + return preserved_scores + + except Exception as e: + print(f"Error preserving ranking: {str(e)}") + return calibrated_scores + + def _calculate_quality_metrics(self, original_scores: List[float], + calibrated_scores: List[float], + distribution: ScoreDistribution) -> Dict[str, float]: + """計算校準品質指標""" + try: + original_array = np.array(original_scores) + calibrated_array = np.array(calibrated_scores) + + # 範圍改善 + original_range = np.max(original_array) - np.min(original_array) + calibrated_range = np.max(calibrated_array) - np.min(calibrated_array) + range_improvement = calibrated_range / max(0.001, original_range) + + # 分離度改善 (相鄰分數間的平均差異) + original_sorted = np.sort(original_array) + calibrated_sorted = np.sort(calibrated_array) + + original_separation = np.mean(np.diff(original_sorted)) if len(original_sorted) > 1 else 0 + calibrated_separation = np.mean(np.diff(calibrated_sorted)) if len(calibrated_sorted) > 1 else 0 + + separation_improvement = (calibrated_separation / max(0.001, original_separation) + if original_separation > 0 else 1.0) + + # 排名保持度 (Spearman 相關係數) + if len(original_scores) > 1: + rank_correlation, _ = stats.spearmanr(original_scores, calibrated_scores) + rank_correlation = abs(rank_correlation) if not np.isnan(rank_correlation) else 1.0 + else: + rank_correlation = 1.0 + + # 分布品質 + calibrated_std = np.std(calibrated_array) + distribution_quality = min(1.0, calibrated_std * 2) # 標準差越大品質越好(在合理範圍內) + + return { + 'range_improvement': range_improvement, + 'separation_improvement': separation_improvement, + 'rank_preservation': rank_correlation, + 'distribution_quality': distribution_quality, + 'effective_range_achieved': calibrated_range, + 'compression_reduction': max(0, distribution.compression_ratio - + (1.0 - calibrated_range)) + } + + except Exception as e: + print(f"Error calculating quality metrics: {str(e)}") + return {'error': str(e)} + + def _distribution_to_dict(self, distribution: ScoreDistribution) -> Dict[str, float]: + """將分布統計轉換為字典""" + return { + 'mean': distribution.mean, + 'std': distribution.std, + 'min_score': distribution.min_score, + 'max_score': distribution.max_score, + 'percentile_5': distribution.percentile_5, + 'percentile_95': distribution.percentile_95, + 'compression_ratio': distribution.compression_ratio, + 'effective_range': distribution.effective_range + } + + def apply_tie_breaking(self, breed_scores: List[Tuple[str, float]]) -> List[Tuple[str, float]]: + """應用確定性的打破平手機制""" + try: + # 按分數分組 + score_groups = {} + for breed, score in breed_scores: + rounded_score = round(score, 6) # 避免浮點數精度問題 + if rounded_score not in score_groups: + score_groups[rounded_score] = [] + score_groups[rounded_score].append((breed, score)) + + # 處理每個分數組 + result = [] + for rounded_score in sorted(score_groups.keys(), reverse=True): + group = score_groups[rounded_score] + + if len(group) == 1: + result.extend(group) + else: + # 按品種名稱字母順序打破平手 + sorted_group = sorted(group, key=lambda x: x[0]) + + # 為平手的品種分配微小的分數差異 + for i, (breed, original_score) in enumerate(sorted_group): + adjusted_score = original_score - (i * 0.0001) + result.append((breed, adjusted_score)) + + return result + + except Exception as e: + print(f"Error in tie breaking: {str(e)}") + return breed_scores + + def get_calibration_summary(self, result: CalibrationResult) -> Dict[str, Any]: + """獲取校準摘要資訊""" + try: + summary = { + 'method_used': result.calibration_method, + 'breeds_processed': len(result.original_scores), + 'score_range_before': { + 'min': min(result.original_scores) if result.original_scores else 0, + 'max': max(result.original_scores) if result.original_scores else 0, + 'range': (max(result.original_scores) - min(result.original_scores)) + if result.original_scores else 0 + }, + 'score_range_after': { + 'min': min(result.calibrated_scores) if result.calibrated_scores else 0, + 'max': max(result.calibrated_scores) if result.calibrated_scores else 0, + 'range': (max(result.calibrated_scores) - min(result.calibrated_scores)) + if result.calibrated_scores else 0 + }, + 'distribution_stats': result.distribution_stats, + 'quality_metrics': result.quality_metrics, + 'improvement_summary': { + 'range_expanded': result.quality_metrics.get('range_improvement', 1.0) > 1.1, + 'separation_improved': result.quality_metrics.get('separation_improvement', 1.0) > 1.1, + 'ranking_preserved': result.quality_metrics.get('rank_preservation', 1.0) > 0.95 + } + } + + return summary + + except Exception as e: + print(f"Error generating calibration summary: {str(e)}") + return {'error': str(e)} + +def calibrate_breed_scores(breed_scores: List[Tuple[str, float]], + method: str = 'auto') -> CalibrationResult: + """ + 便利函數:校準品種分數 + + Args: + breed_scores: (breed_name, score) 元組列表 + method: 校準方法 + + Returns: + CalibrationResult: 校準結果 + """ + calibrator = ScoreCalibrator() + return calibrator.calibrate_scores(breed_scores, method) + +def get_calibrated_rankings(breed_scores: List[Tuple[str, float]], + method: str = 'auto') -> List[Tuple[str, float, int]]: + """ + 便利函數:獲取校準後的排名 + + Args: + breed_scores: (breed_name, score) 元組列表 + method: 校準方法 + + Returns: + List[Tuple[str, float, int]]: (breed_name, calibrated_score, rank) 列表 + """ + calibrator = ScoreCalibrator() + result = calibrator.calibrate_scores(breed_scores, method) + + # 打破平手機制 + calibrated_with_breed = [(breed, result.score_mapping[breed]) for breed in result.score_mapping] + calibrated_with_tie_breaking = calibrator.apply_tie_breaking(calibrated_with_breed) + + # 添加排名 + ranked_results = [] + for rank, (breed, score) in enumerate(calibrated_with_tie_breaking, 1): + ranked_results.append((breed, score, rank)) + + return ranked_results diff --git a/score_integration_manager.py b/score_integration_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..de86a835aad593d53f0ef56d6fd7363d8510c5ae --- /dev/null +++ b/score_integration_manager.py @@ -0,0 +1,805 @@ +import math +import traceback +from typing import Dict, Any, List +from dataclasses import dataclass + +@dataclass +class UserPreferences: + """使用者偏好設定的資料結構""" + living_space: str # "apartment", "house_small", "house_large" + yard_access: str # "no_yard", "shared_yard", "private_yard" + exercise_time: int # minutes per day + exercise_type: str # "light_walks", "moderate_activity", "active_training" + grooming_commitment: str # "low", "medium", "high" + experience_level: str # "beginner", "intermediate", "advanced" + time_availability: str # "limited", "moderate", "flexible" + has_children: bool + children_age: str # "toddler", "school_age", "teenager" + noise_tolerance: str # "low", "medium", "high" + space_for_play: bool + other_pets: bool + climate: str # "cold", "moderate", "hot" + health_sensitivity: str = "medium" + barking_acceptance: str = None + size_preference: str = "no_preference" # "no_preference", "small", "medium", "large", "giant" + training_commitment: str = "medium" # "low", "medium", "high" - 訓練投入程度 + living_environment: str = "ground_floor" # "ground_floor", "with_elevator", "walk_up" - 居住環境細節 + + def __post_init__(self): + if self.barking_acceptance is None: + self.barking_acceptance = self.noise_tolerance + + +class ScoreIntegrationManager: + """ + 評分整合管理器類別 + 負責動態權重計算、評分整合和條件互動評估 + """ + + def __init__(self): + """初始化評分整合管理器""" + pass + + def apply_size_filter(self, breed_score: float, user_preference: str, breed_size: str) -> float: + """ + 強過濾機制,基於用戶的體型偏好過濾品種 + + Parameters: + breed_score (float): 原始品種評分 + user_preference (str): 用戶偏好的體型 + breed_size (str): 品種的實際體型 + + Returns: + float: 過濾後的評分,如果體型不符合會返回 0 + """ + if user_preference == "no_preference": + return breed_score + + # 標準化 size 字串以進行比較 + breed_size = breed_size.lower().strip() + user_preference = user_preference.lower().strip() + + # 特殊處理 "varies" 的情況 + if breed_size == "varies": + return breed_score * 0.5 # 給予一個折扣係數,因為不確定性 + + # 如果用戶有明確體型偏好但品種不符合,返回 0 + if user_preference != breed_size: + return 0 + + return breed_score + + def calculate_breed_compatibility_score(self, scores: dict, user_prefs: UserPreferences, breed_info: dict) -> float: + """ + 計算品種相容性總分,完整實現原始版本的複雜邏輯: + 1. 運動類型與時間的精確匹配 + 2. 進階使用者的專業需求 + 3. 空間利用的實際效果 + 4. 條件組合的嚴格評估 + """ + def evaluate_perfect_conditions(): + """ + 評估條件匹配度,特別強化: + 1. 運動類型與時間的綜合評估 + 2. 專業技能需求評估 + 3. 品種特性評估 + """ + perfect_matches = { + 'size_match': 0, + 'exercise_match': 0, + 'experience_match': 0, + 'living_condition_match': 0, + 'breed_trait_match': 0 # 新增品種特性匹配度 + } + + # 第一部分:運動需求評估 + def evaluate_exercise_compatibility(): + """ + 評估運動需求的匹配度,特別關注: + 1. 時間與強度的合理搭配 + 2. 不同品種的運動特性 + 3. 運動類型的適配性 + + 這個函數就像是一個體育教練,需要根據每個"運動員"(狗品種)的特點, + 為他們制定合適的訓練計劃。 + """ + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + exercise_time = user_prefs.exercise_time + exercise_type = user_prefs.exercise_type + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + + # 定義更精確的品種運動特性 + breed_exercise_patterns = { + 'sprint_type': { # 短跑型犬種,如 Whippet, Saluki + 'identifiers': ['fast', 'speed', 'sprint', 'racing', 'coursing', 'sight hound'], + 'ideal_exercise': { + 'active_training': 1.0, # 完美匹配高強度訓練 + 'moderate_activity': 0.5, # 持續運動不是最佳選擇 + 'light_walks': 0.3 # 輕度運動效果很差 + }, + 'time_ranges': { + 'ideal': (30, 60), # 最適合的運動時間範圍 + 'acceptable': (20, 90), # 可以接受的時間範圍 + 'penalty_start': 90 # 開始給予懲罰的時間點 + }, + 'penalty_rate': 0.8 # 超出範圍時的懲罰係數 + }, + 'endurance_type': { # 耐力型犬種,如 Border Collie + 'identifiers': ['herding', 'working', 'tireless', 'energetic', 'stamina', 'athletic'], + 'ideal_exercise': { + 'active_training': 0.9, # 高強度訓練很好 + 'moderate_activity': 1.0, # 持續運動是最佳選擇 + 'light_walks': 0.4 # 輕度運動不足 + }, + 'time_ranges': { + 'ideal': (90, 180), # 需要較長的運動時間 + 'acceptable': (60, 180), + 'penalty_start': 60 # 運動時間過短會受罰 + }, + 'penalty_rate': 0.7 + }, + 'moderate_type': { # 一般活動型犬種,如 Labrador + 'identifiers': ['friendly', 'playful', 'adaptable', 'versatile', 'companion'], + 'ideal_exercise': { + 'active_training': 0.8, + 'moderate_activity': 1.0, + 'light_walks': 0.6 + }, + 'time_ranges': { + 'ideal': (60, 120), + 'acceptable': (45, 150), + 'penalty_start': 150 + }, + 'penalty_rate': 0.6 + } + } + + def determine_breed_type(): + """改進品種運動類型的判斷,更精確識別工作犬""" + # 優先檢查特殊運動類型的標識符 + for breed_type, pattern in breed_exercise_patterns.items(): + if any(identifier in temperament or identifier in description + for identifier in pattern['identifiers']): + return breed_type + + # 改進:根據運動需求和工作犬特徵進行更細緻的判斷 + if (exercise_needs in ['VERY HIGH', 'HIGH'] or + any(trait in temperament.lower() for trait in + ['herding', 'working', 'intelligent', 'athletic', 'tireless'])): + if user_prefs.experience_level == 'advanced': + return 'endurance_type' # 優先判定為耐力型 + elif exercise_needs == 'LOW': + return 'moderate_type' + + return 'moderate_type' + + def calculate_time_match(pattern): + """ + 計算運動時間的匹配度。 + 這就像在判斷運動時間是否符合訓練計劃。 + """ + ideal_min, ideal_max = pattern['time_ranges']['ideal'] + accept_min, accept_max = pattern['time_ranges']['acceptable'] + penalty_start = pattern['time_ranges']['penalty_start'] + + # 在理想範圍內 + if ideal_min <= exercise_time <= ideal_max: + return 1.0 + + # 超出可接受範圍的嚴格懲罰 + elif exercise_time < accept_min: + deficit = accept_min - exercise_time + return max(0.2, 1 - (deficit / accept_min) * 1.2) + elif exercise_time > accept_max: + excess = exercise_time - penalty_start + penalty = min(0.8, (excess / penalty_start) * pattern['penalty_rate']) + return max(0.2, 1 - penalty) + + # 在可接受範圍但不在理想範圍 + else: + if exercise_time < ideal_min: + progress = (exercise_time - accept_min) / (ideal_min - accept_min) + return 0.6 + (0.4 * progress) + else: + remaining = (accept_max - exercise_time) / (accept_max - ideal_max) + return 0.6 + (0.4 * remaining) + + def apply_special_adjustments(time_score, type_score, breed_type, pattern): + """ + 處理特殊情況,確保運動方式真正符合品種需求。 + 特別加強: + 1. 短跑型犬種的長時間運動懲罰 + 2. 耐力型犬種的獎勵機制 + 3. 運動類型匹配的重要性 + """ + # 短跑型品種的特殊處理 + if breed_type == 'sprint_type': + if exercise_time > pattern['time_ranges']['penalty_start']: + # 加重長時間運動的懲罰 + penalty_factor = min(0.8, (exercise_time - pattern['time_ranges']['penalty_start']) / 60) + time_score *= max(0.3, 1 - penalty_factor) # 最低降到0.3 + # 運動類型不適合時的額外懲罰 + if exercise_type != 'active_training': + type_score *= 0.3 # 更嚴重的懲罰 + + # 耐力型品種的特殊處理 + elif breed_type == 'endurance_type': + if exercise_time < pattern['time_ranges']['penalty_start']: + time_score *= 0.5 # 維持運動不足的懲罰 + elif exercise_time >= 150: # 新增:高運動量獎勵 + if exercise_type in ['active_training', 'moderate_activity']: + time_bonus = min(0.3, (exercise_time - 150) / 150) + time_score = min(1.0, time_score * (1 + time_bonus)) + type_score = min(1.0, type_score * 1.2) + + # 運動強度不足的懲罰 + if exercise_type == 'light_walks': + if exercise_time > 90: + type_score *= 0.4 # 加重懲罰 + else: + type_score *= 0.5 + + return time_score, type_score + + # 執行評估流程 + breed_type = determine_breed_type() + pattern = breed_exercise_patterns[breed_type] + + # 計算基礎分數 + time_score = calculate_time_match(pattern) + type_score = pattern['ideal_exercise'].get(exercise_type, 0.5) + + # 應用特殊調整 + time_score, type_score = apply_special_adjustments(time_score, type_score, breed_type, pattern) + + # 根據品種類型決定最終權重 + if breed_type == 'sprint_type': + if exercise_time > pattern['time_ranges']['penalty_start']: + # 超時時更重視運動類型的匹配度 + return (time_score * 0.3) + (type_score * 0.7) + else: + return (time_score * 0.5) + (type_score * 0.5) + elif breed_type == 'endurance_type': + if exercise_time < pattern['time_ranges']['penalty_start']: + # 時間不足時更重視時間因素 + return (time_score * 0.7) + (type_score * 0.3) + else: + return (time_score * 0.6) + (type_score * 0.4) + else: + return (time_score * 0.5) + (type_score * 0.5) + + # 第二部分:專業技能需求評估 + def evaluate_expertise_requirements(): + care_level = breed_info.get('Care Level', 'MODERATE').upper() + temperament = breed_info.get('Temperament', '').lower() + + # 定義專業技能要求 + expertise_requirements = { + 'training_complexity': { + 'VERY HIGH': {'beginner': 0.2, 'intermediate': 0.5, 'advanced': 0.9}, + 'HIGH': {'beginner': 0.3, 'intermediate': 0.7, 'advanced': 1.0}, + 'MODERATE': {'beginner': 0.6, 'intermediate': 0.9, 'advanced': 1.0}, + 'LOW': {'beginner': 0.9, 'intermediate': 0.95, 'advanced': 0.9} + }, + 'special_traits': { + 'working': 0.2, # 工作犬需要額外技能 + 'herding': 0.2, # 牧羊犬需要特殊訓練 + 'intelligent': 0.15,# 高智商犬種需要心智刺激 + 'independent': 0.15,# 獨立性強的需要特殊處理 + 'protective': 0.1 # 護衛犬需要適當訓練 + } + } + + # 基礎分數 + base_score = expertise_requirements['training_complexity'][care_level][user_prefs.experience_level] + + # 特殊特徵評估 + trait_penalty = 0 + for trait, penalty in expertise_requirements['special_traits'].items(): + if trait in temperament: + if user_prefs.experience_level == 'beginner': + trait_penalty += penalty + elif user_prefs.experience_level == 'advanced': + trait_penalty -= penalty * 0.5 # 專家反而因應對特殊特徵而加分 + + return max(0.2, min(1.0, base_score - trait_penalty)) + + def evaluate_living_conditions() -> float: + """ + 評估生活環境適配性,特別加強: + 1. 降低對大型犬的過度懲罰 + 2. 增加品種特性評估 + 3. 提升對適應性的重視度 + """ + size = breed_info['Size'] + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + + # 重新定義空間需求矩陣,降低對大型犬的懲罰 + space_requirements = { + 'apartment': { + 'Small': 1.0, + 'Medium': 0.8, + 'Large': 0.7, + 'Giant': 0.6 + }, + 'house_small': { + 'Small': 0.9, + 'Medium': 1.0, + 'Large': 0.8, + 'Giant': 0.7 + }, + 'house_large': { + 'Small': 0.8, + 'Medium': 0.9, + 'Large': 1.0, + 'Giant': 1.0 + } + } + + # 基礎空間分數 + space_score = space_requirements.get( + user_prefs.living_space, + space_requirements['house_small'] + )[size] + + # 品種適應性評估 + adaptability_bonus = 0 + adaptable_traits = ['adaptable', 'calm', 'quiet', 'gentle', 'laid-back'] + challenging_traits = ['hyperactive', 'restless', 'requires space'] + + # 計算適應性加分 + if user_prefs.living_space == 'apartment': + for trait in adaptable_traits: + if trait in temperament or trait in description: + adaptability_bonus += 0.1 + + # 特別處理大型犬的適應性 + if size in ['Large', 'Giant']: + apartment_friendly_traits = ['calm', 'gentle', 'quiet'] + matched_traits = sum(1 for trait in apartment_friendly_traits + if trait in temperament or trait in description) + if matched_traits > 0: + adaptability_bonus += 0.15 * matched_traits + + # 活動空間需求調整,更寬容的評估 + if exercise_needs in ['HIGH', 'VERY HIGH']: + if user_prefs.living_space != 'house_large': + space_score *= 0.9 # 從0.8提升到0.9,降低懲罰 + + # 院子可用性評估,提供更合理的獎勵 + yard_scores = { + 'no_yard': 0.85, # 從0.7提升到0.85 + 'shared_yard': 0.92, # 從0.85提升到0.92 + 'private_yard': 1.0 + } + yard_multiplier = yard_scores.get(user_prefs.yard_access, 0.85) + + # 根據體型調整院子重要性 + if size in ['Large', 'Giant']: + yard_importance = 1.2 + elif size == 'Medium': + yard_importance = 1.1 + else: + yard_importance = 1.0 + + # 計算最終分數 + final_score = space_score * (1 + adaptability_bonus) + + # 應用院子影響 + if user_prefs.yard_access != 'no_yard': + yard_bonus = (yard_multiplier - 1) * yard_importance + final_score = min(1.0, final_score + yard_bonus) + + # 確保分數在合理範圍內,但提供更高的基礎分數 + return max(0.4, min(1.0, final_score)) + + # 第四部分:品種特性評估 + def evaluate_breed_traits(): + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + + trait_scores = [] + + # 評估性格特徵 + if user_prefs.has_children: + if 'good with children' in description: + trait_scores.append(1.0) + elif 'patient' in temperament or 'gentle' in temperament: + trait_scores.append(0.8) + else: + trait_scores.append(0.5) + + # 評估適應性 + adaptability_keywords = ['adaptable', 'versatile', 'flexible'] + if any(keyword in temperament for keyword in adaptability_keywords): + trait_scores.append(1.0) + else: + trait_scores.append(0.7) + + return sum(trait_scores) / len(trait_scores) if trait_scores else 0.7 + + # 計算各項匹配分數 + perfect_matches['exercise_match'] = evaluate_exercise_compatibility() + perfect_matches['experience_match'] = evaluate_expertise_requirements() + perfect_matches['living_condition_match'] = evaluate_living_conditions() + perfect_matches['size_match'] = evaluate_living_conditions() # 共用生活環境評估 + perfect_matches['breed_trait_match'] = evaluate_breed_traits() + + return perfect_matches + + def calculate_weights() -> dict: + """ + 動態計算評分權重,特別關注: + 1. 極端情況的權重調整 + 2. 使用者條件的協同效應 + 3. 品種特性的影響 + + Returns: + dict: 包含各評分項目權重的字典 + """ + # 定義基礎權重 - 提供更合理的起始分配 + base_weights = { + 'space': 0.25, # 提升空間權重,因為這是最基本的需求 + 'exercise': 0.25, # 運動需求同樣重要 + 'experience': 0.20, # 保持經驗的重要性 + 'grooming': 0.10, # 稍微降低美容需求的權重 + 'noise': 0.10, # 維持噪音評估的權重 + 'health': 0.10 # 維持健康評估的權重 + } + + def analyze_condition_extremity() -> dict: + """ + 評估使用者條件的極端程度,這影響權重的動態調整。 + 根據條件的極端程度返回相應的調整建議。 + """ + extremities = {} + + # 運動時間評估 - 更細緻的分級 + if user_prefs.exercise_time <= 30: + extremities['exercise'] = ('extremely_low', 0.8) + elif user_prefs.exercise_time <= 60: + extremities['exercise'] = ('low', 0.6) + elif user_prefs.exercise_time >= 180: + extremities['exercise'] = ('extremely_high', 0.8) + elif user_prefs.exercise_time >= 120: + extremities['exercise'] = ('high', 0.6) + else: + extremities['exercise'] = ('moderate', 0.3) + + # 空間限制評估 - 更合理的空間評估 + space_extremity = { + 'apartment': ('restricted', 0.7), # 從0.9降低到0.7,減少限制 + 'house_small': ('moderate', 0.5), + 'house_large': ('spacious', 0.3) + } + extremities['space'] = space_extremity.get(user_prefs.living_space, ('moderate', 0.5)) + + # 經驗水平評估 - 保持原有的評估邏輯 + experience_extremity = { + 'beginner': ('low', 0.7), + 'intermediate': ('moderate', 0.4), + 'advanced': ('high', 0.6) + } + extremities['experience'] = experience_extremity.get(user_prefs.experience_level, ('moderate', 0.5)) + + return extremities + + def calculate_weight_adjustments(extremities: dict) -> dict: + """ + 根據極端程度計算權重調整,特別注意條件組合的影響。 + """ + adjustments = {} + temperament = breed_info.get('Temperament', '').lower() + is_working_dog = any(trait in temperament + for trait in ['herding', 'working', 'intelligent', 'tireless']) + + # 空間權重調整 + if extremities['space'][0] == 'restricted': + if extremities['exercise'][0] in ['high', 'extremely_high']: + adjustments['space'] = 1.3 + adjustments['exercise'] = 2.3 + else: + adjustments['space'] = 1.6 + adjustments['noise'] = 1.5 + + # 運動需求權重調整 + if extremities['exercise'][0] in ['extremely_high', 'extremely_low']: + base_adjustment = 2.0 + if extremities['exercise'][0] == 'extremely_high': + if is_working_dog: + base_adjustment = 2.3 + adjustments['exercise'] = base_adjustment + + # 經驗需求權重調整 + if extremities['experience'][0] == 'low': + adjustments['experience'] = 1.8 + if breed_info.get('Care Level') == 'HIGH': + adjustments['experience'] = 2.0 + elif extremities['experience'][0] == 'high': + if is_working_dog: + adjustments['experience'] = 1.8 # 從2.5降低到1.8 + + # 特殊組合的處理 + def adjust_for_combinations(): + if (extremities['space'][0] == 'restricted' and + extremities['exercise'][0] in ['high', 'extremely_high']): + # 適度降低極端組合的影響 + adjustments['space'] = adjustments.get('space', 1.0) * 1.2 + adjustments['exercise'] = adjustments.get('exercise', 1.0) * 1.2 + + # 理想組合的獎勵 + if (extremities['experience'][0] == 'high' and + extremities['space'][0] == 'spacious' and + extremities['exercise'][0] in ['high', 'extremely_high'] and + is_working_dog): + adjustments['exercise'] = adjustments.get('exercise', 1.0) * 1.3 + adjustments['experience'] = adjustments.get('experience', 1.0) * 1.3 + + adjust_for_combinations() + return adjustments + + # 獲取條件極端度 + extremities = analyze_condition_extremity() + + # 計算權重調整 + weight_adjustments = calculate_weight_adjustments(extremities) + + # 應用權重調整,確保總和為1 + final_weights = base_weights.copy() + for key, adjustment in weight_adjustments.items(): + if key in final_weights: + final_weights[key] *= adjustment + + # 正規化權重 + total_weight = sum(final_weights.values()) + normalized_weights = {k: v/total_weight for k, v in final_weights.items()} + + return normalized_weights + + def calculate_base_score(scores: dict, weights: dict) -> float: + """ + 計算基礎評分分數,採用更靈活的評分機制。 + + 這個函數使用了改進後的評分邏輯,主要關注: + 1. 降低關鍵指標的最低門檻,使系統更包容 + 2. 引入非線性評分曲線,讓分數分布更合理 + 3. 優化多重條件失敗的處理方式 + 4. 加強對品種特性的考慮 + + Parameters: + scores: 包含各項評分的字典 + weights: 包含各項權重的字典 + + Returns: + float: 0.2到1.0之間的基礎分數 + """ + # 重新定義關鍵指標閾值,提供更寬容的評分標準 + critical_thresholds = { + 'space': 0.35, + 'exercise': 0.35, + 'experience': 0.5, + 'noise': 0.5 + } + + # 評估關鍵指標失敗情況 + def evaluate_critical_failures() -> list: + """ + 評估關鍵指標的失敗情況,但採用更寬容的標準。 + 根據品種特性動態調整失敗判定。 + """ + failures = [] + temperament = breed_info.get('Temperament', '').lower() + + for metric, threshold in critical_thresholds.items(): + if scores[metric] < threshold: + # 特殊情況處理:適應性強的品種可以有更低的空間要求 + if metric == 'space' and any(trait in temperament + for trait in ['adaptable', 'calm', 'apartment']): + if scores[metric] >= threshold - 0.1: + continue + + # 運動需求的特殊處理 + elif metric == 'exercise': + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + if exercise_needs == 'LOW' and scores[metric] >= threshold - 0.1: + continue + + failures.append((metric, scores[metric])) + + return failures + + # 計算基礎分數 + def calculate_weighted_score() -> float: + """ + 計算加權分數,使用非線性函數使分數分布更合理。 + """ + weighted_scores = [] + for key, score in scores.items(): + if key in weights: + # 使用sigmoid函數使分數曲線更平滑 + adjusted_score = 1 / (1 + math.exp(-10 * (score - 0.5))) + weighted_scores.append(adjusted_score * weights[key]) + + return sum(weighted_scores) + + # 處理臨界失敗情況 + critical_failures = evaluate_critical_failures() + base_score = calculate_weighted_score() + + if critical_failures: + # 分離空間和運動相關的懲罰 + space_exercise_penalty = 0 + other_penalty = 0 + + for metric, score in critical_failures: + if metric in ['space', 'exercise']: + # 降低空間和運動失敗的懲罰程度 + penalty = (critical_thresholds[metric] - score) * 0.08 + space_exercise_penalty += penalty + else: + # 其他失敗的懲罰保持較高 + penalty = (critical_thresholds[metric] - score) * 0.20 + other_penalty += penalty + + # 計算總懲罰,但使用更溫和的方式 + total_penalty = (space_exercise_penalty + other_penalty) / 2 + base_score *= (1 - total_penalty) + + # 多重失敗的處理更寬容 + if len(critical_failures) > 1: + # 從0.98提升到0.99,降低多重失敗的疊加懲罰 + base_score *= (0.99 ** (len(critical_failures) - 1)) + + # 品種特性加分 + def apply_breed_bonus() -> float: + """ + 根據品種特性提供額外加分, + 特別是對於在特定環境下表現良好的品種。 + """ + bonus = 0 + temperament = breed_info.get('Temperament', '').lower() + description = breed_info.get('Description', '').lower() + + # 適應性加分 + adaptability_traits = ['adaptable', 'versatile', 'easy-going'] + if any(trait in temperament for trait in adaptability_traits): + bonus += 0.05 + + # 公寓適應性加分 + if user_prefs.living_space == 'apartment': + apartment_traits = ['calm', 'quiet', 'good for apartments'] + if any(trait in temperament or trait in description for trait in apartment_traits): + bonus += 0.05 + + return min(0.1, bonus) # 限制最大加分 + + # 應用品種特性加分 + breed_bonus = apply_breed_bonus() + base_score = min(1.0, base_score * (1 + breed_bonus)) + + # 確保最終分數在合理範圍內 + return max(0.2, min(1.0, base_score)) + + def evaluate_condition_interactions(scores: dict) -> float: + """評估不同條件間的相互影響,更寬容地處理極端組合""" + interaction_penalty = 1.0 + + # 只保留最基本的經驗相關評估 + if user_prefs.experience_level == 'beginner': + if breed_info.get('Care Level') == 'HIGH': + interaction_penalty *= 0.95 + + # 運動時間與類型的基本互動也降低懲罰程度 + exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() + if exercise_needs == 'VERY HIGH' and user_prefs.exercise_type == 'light_walks': + interaction_penalty *= 0.95 + + return interaction_penalty + + def calculate_adjusted_perfect_bonus(perfect_conditions: dict) -> float: + """計算完美匹配獎勵,但更注重條件的整體表現""" + bonus = 1.0 + + # 降低單項獎勵的影響力 + bonus += 0.06 * perfect_conditions['size_match'] + bonus += 0.06 * perfect_conditions['exercise_match'] + bonus += 0.06 * perfect_conditions['experience_match'] + bonus += 0.03 * perfect_conditions['living_condition_match'] + + # 如果有任何條件表現不佳,降低整體獎勵 + low_scores = [score for score in perfect_conditions.values() if score < 0.6] + if low_scores: + bonus *= (0.85 ** len(low_scores)) + + # 確保獎勵不會過高 + return min(1.25, bonus) + + def apply_breed_specific_adjustments(score: float) -> float: + """根據品種特性進行最終調整""" + # 檢查是否存在極端不匹配的情況 + exercise_mismatch = False + size_mismatch = False + experience_mismatch = False + + # 運動需求極端不匹配 + if breed_info.get('Exercise Needs', 'MODERATE').upper() == 'VERY HIGH': + if user_prefs.exercise_time < 90 or user_prefs.exercise_type == 'light_walks': + exercise_mismatch = True + + # 體型與空間極端不匹配 + if user_prefs.living_space == 'apartment' and breed_info['Size'] in ['Large', 'Giant']: + size_mismatch = True + + # 經驗需求極端不匹配 + if user_prefs.experience_level == 'beginner' and breed_info.get('Care Level') == 'HIGH': + experience_mismatch = True + + # 根據不匹配的數量進行懲罰 + mismatch_count = sum([exercise_mismatch, size_mismatch, experience_mismatch]) + if mismatch_count > 0: + score *= (0.8 ** mismatch_count) + + return score + + # 計算動態權重 + weights = calculate_weights() + + # 正規化權重 + total_weight = sum(weights.values()) + normalized_weights = {k: v/total_weight for k, v in weights.items()} + + # 計算基礎分數 + base_score = calculate_base_score(scores, normalized_weights) + + # 評估條件互動 + interaction_multiplier = evaluate_condition_interactions(scores) + + # 計算完美匹配獎勵 + perfect_conditions = evaluate_perfect_conditions() + perfect_bonus = calculate_adjusted_perfect_bonus(perfect_conditions) + + # 計算初步分數 + preliminary_score = base_score * interaction_multiplier * perfect_bonus + + # 應用品種特定調整 + final_score = apply_breed_specific_adjustments(preliminary_score) + + # 確保分數在合理範圍內,並降低最高可能分數 + max_possible_score = 0.96 # 降低最高可能分數 + min_possible_score = 0.3 + + return min(max_possible_score, max(min_possible_score, final_score)) + + def calculate_environmental_fit(self, breed_info: dict, user_prefs: UserPreferences) -> float: + """ + 計算品種與環境的適應性加成 + + Args: + breed_info: 品種資訊 + user_prefs: 使用者偏好 + + Returns: + float: 環境適應性加成分數 + """ + adaptability_score = 0.0 + description = breed_info.get('Description', '').lower() + temperament = breed_info.get('Temperament', '').lower() + + # 環境適應性評估 + if user_prefs.living_space == 'apartment': + if 'adaptable' in temperament or 'apartment' in description: + adaptability_score += 0.1 + if breed_info.get('Size') == 'Small': + adaptability_score += 0.05 + elif user_prefs.living_space == 'house_large': + if 'active' in temperament or 'energetic' in description: + adaptability_score += 0.1 + + # 氣候適應性 + if user_prefs.climate in description or user_prefs.climate in temperament: + adaptability_score += 0.05 + + return min(0.2, adaptability_score) diff --git a/scoring_calculation_system.py b/scoring_calculation_system.py index 25e8a1d30654c1b1b320007d04d721ce3ec00f93..bf97193e59d6ba81ffe4f1b6bf3cb697b6b27fe9 100644 --- a/scoring_calculation_system.py +++ b/scoring_calculation_system.py @@ -1,438 +1,72 @@ from dataclasses import dataclass +from typing import Dict, List, Any, Optional +import math +import random +import numpy as np +import traceback from breed_health_info import breed_health_info from breed_noise_info import breed_noise_info -import traceback -import math +from dog_database import get_dog_description +from dimension_score_calculator import DimensionScoreCalculator +from score_integration_manager import ScoreIntegrationManager, UserPreferences +from bonus_penalty_engine import BonusPenaltyEngine + +@dataclass +class DimensionalScore: + """維度分數結構""" + dimension_name: str + raw_score: float # 原始計算分數 (0.0-1.0) + weight: float # 維度權重 (0.0-1.0) + display_score: float # 顯示分數 (0.0-1.0) + explanation: str # 評分說明 + @dataclass -class UserPreferences: - - """使用者偏好設定的資料結構""" - living_space: str # "apartment", "house_small", "house_large" - yard_access: str # "no_yard", "shared_yard", "private_yard" - exercise_time: int # minutes per day - exercise_type: str # "light_walks", "moderate_activity", "active_training" - grooming_commitment: str # "low", "medium", "high" - experience_level: str # "beginner", "intermediate", "advanced" - time_availability: str # "limited", "moderate", "flexible" - has_children: bool - children_age: str # "toddler", "school_age", "teenager" - noise_tolerance: str # "low", "medium", "high" - space_for_play: bool - other_pets: bool - climate: str # "cold", "moderate", "hot" - health_sensitivity: str = "medium" - barking_acceptance: str = None - size_preference: str = "no_preference" # "no_preference", "small", "medium", "large", "giant" - training_commitment: str = "medium" # "low", "medium", "high" - 訓練投入程度 - living_environment: str = "ground_floor" # "ground_floor", "with_elevator", "walk_up" - 居住環境細節 - - def __post_init__(self): - if self.barking_acceptance is None: - self.barking_acceptance = self.noise_tolerance +class UnifiedBreedScore: + """統一品種評分結果""" + breed_name: str + overall_score: float # 總體分數 (0.0-1.0) + dimensional_scores: List[DimensionalScore] # 各維度分數 + bonus_factors: Dict[str, float] # 加分因素 + penalty_factors: Dict[str, float] # 扣分因素 + confidence_level: float # 推薦信心度 (0.0-1.0) + match_explanation: str # 匹配說明 + warnings: List[str] # 警告訊息 + + +# 初始化計算器實例 +_dimension_calculator = DimensionScoreCalculator() +_score_manager = ScoreIntegrationManager() +_bonus_engine = BonusPenaltyEngine() + def apply_size_filter(breed_score: float, user_preference: str, breed_size: str) -> float: """ - 基於用戶的體型偏好過濾品種,只要不符合就過濾掉 - + 強過濾機制,基於用戶的體型偏好過濾品種 + Parameters: breed_score (float): 原始品種評分 user_preference (str): 用戶偏好的體型 breed_size (str): 品種的實際體型 - + Returns: float: 過濾後的評分,如果體型不符合會返回 0 """ - if user_preference == "no_preference": - return breed_score - - # 標準化 size 字串以進行比較 - breed_size = breed_size.lower().strip() - user_preference = user_preference.lower().strip() - - # 特殊處理 "varies" 的情況 - if breed_size == "varies": - return breed_score * 0.5 # 給予一個折扣係數,因為不確定性 - - # 如果用戶有明確體型偏好但品種不符合,返回 0 - if user_preference != breed_size: - return 0 - - return breed_score + return _score_manager.apply_size_filter(breed_score, user_preference, breed_size) @staticmethod def calculate_breed_bonus(breed_info: dict, user_prefs: 'UserPreferences') -> float: """計算品種額外加分""" - bonus = 0.0 - temperament = breed_info.get('Temperament', '').lower() - - # 1. 壽命加分(最高0.05) - try: - lifespan = breed_info.get('Lifespan', '10-12 years') - years = [int(x) for x in lifespan.split('-')[0].split()[0:1]] - longevity_bonus = min(0.05, (max(years) - 10) * 0.01) - bonus += longevity_bonus - except: - pass - - # 2. 性格特徵加分(最高0.15) - positive_traits = { - 'friendly': 0.05, - 'gentle': 0.05, - 'patient': 0.05, - 'intelligent': 0.04, - 'adaptable': 0.04, - 'affectionate': 0.04, - 'easy-going': 0.03, - 'calm': 0.03 - } - - negative_traits = { - 'aggressive': -0.08, - 'stubborn': -0.06, - 'dominant': -0.06, - 'aloof': -0.04, - 'nervous': -0.05, - 'protective': -0.04 - } - - personality_score = sum(value for trait, value in positive_traits.items() if trait in temperament) - personality_score += sum(value for trait, value in negative_traits.items() if trait in temperament) - bonus += max(-0.15, min(0.15, personality_score)) - - # 3. 適應性加分(最高0.1) - adaptability_bonus = 0.0 - if breed_info.get('Size') == "Small" and user_prefs.living_space == "apartment": - adaptability_bonus += 0.05 - if 'adaptable' in temperament or 'versatile' in temperament: - adaptability_bonus += 0.05 - bonus += min(0.1, adaptability_bonus) - - # 4. 家庭相容性(最高0.1) - if user_prefs.has_children: - family_traits = { - 'good with children': 0.06, - 'patient': 0.05, - 'gentle': 0.05, - 'tolerant': 0.04, - 'playful': 0.03 - } - unfriendly_traits = { - 'aggressive': -0.08, - 'nervous': -0.07, - 'protective': -0.06, - 'territorial': -0.05 - } - - # 年齡評估 - age_adjustments = { - 'toddler': {'bonus_mult': 0.7, 'penalty_mult': 1.3}, - 'school_age': {'bonus_mult': 1.0, 'penalty_mult': 1.0}, - 'teenager': {'bonus_mult': 1.2, 'penalty_mult': 0.8} - } - - adj = age_adjustments.get(user_prefs.children_age, - {'bonus_mult': 1.0, 'penalty_mult': 1.0}) - - family_bonus = sum(value for trait, value in family_traits.items() - if trait in temperament) * adj['bonus_mult'] - family_penalty = sum(value for trait, value in unfriendly_traits.items() - if trait in temperament) * adj['penalty_mult'] - - bonus += min(0.15, max(-0.2, family_bonus + family_penalty)) - - - # 5. 專門技能加分(最高0.1) - skill_bonus = 0.0 - special_abilities = { - 'working': 0.03, - 'herding': 0.03, - 'hunting': 0.03, - 'tracking': 0.03, - 'agility': 0.02 - } - for ability, value in special_abilities.items(): - if ability in temperament.lower(): - skill_bonus += value - bonus += min(0.1, skill_bonus) - - - # 6. 適應性評估 - adaptability_bonus = 0.0 - if breed_info.get('Size') == "Small" and user_prefs.living_space == "apartment": - adaptability_bonus += 0.08 - - # 環境適應性評估 - if 'adaptable' in temperament or 'versatile' in temperament: - if user_prefs.living_space == "apartment": - adaptability_bonus += 0.10 - else: - adaptability_bonus += 0.05 - - # 氣候適應性 - description = breed_info.get('Description', '').lower() - climate = user_prefs.climate - if climate == 'hot': - if 'heat tolerant' in description or 'warm climate' in description: - adaptability_bonus += 0.08 - elif 'thick coat' in description or 'cold climate' in description: - adaptability_bonus -= 0.10 - elif climate == 'cold': - if 'thick coat' in description or 'cold climate' in description: - adaptability_bonus += 0.08 - elif 'heat tolerant' in description or 'short coat' in description: - adaptability_bonus -= 0.10 - - bonus += min(0.15, adaptability_bonus) - - return min(0.5, max(-0.25, bonus)) - + return BonusPenaltyEngine.calculate_breed_bonus(breed_info, user_prefs) + @staticmethod def calculate_additional_factors(breed_info: dict, user_prefs: 'UserPreferences') -> dict: """ 計算額外的評估因素,結合品種特性與使用者需求的全面評估系統 - - 1. 多功能性評估 - 品種的多樣化能力 - 2. 訓練性評估 - 學習和服從能力 - 3. 能量水平評估 - 活力和運動需求 - 4. 美容需求評估 - 護理和維護需求 - 5. 社交需求評估 - 與人互動的需求程度 - 6. 氣候適應性 - 對環境的適應能力 - 7. 運動類型匹配 - 與使用者運動習慣的契合度 - 8. 生活方式適配 - 與使用者日常生活的匹配度 """ - factors = { - 'versatility': 0.0, # 多功能性 - 'trainability': 0.0, # 可訓練度 - 'energy_level': 0.0, # 能量水平 - 'grooming_needs': 0.0, # 美容需求 - 'social_needs': 0.0, # 社交需求 - 'weather_adaptability': 0.0,# 氣候適應性 - 'exercise_match': 0.0, # 運動匹配度 - 'lifestyle_fit': 0.0 # 生活方式適配度 - } - - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - size = breed_info.get('Size', 'Medium') - - # 1. 多功能性評估 - 加強品種用途評估 - versatile_traits = { - 'intelligent': 0.25, - 'adaptable': 0.25, - 'trainable': 0.20, - 'athletic': 0.15, - 'versatile': 0.15 - } - - working_roles = { - 'working': 0.20, - 'herding': 0.15, - 'hunting': 0.15, - 'sporting': 0.15, - 'companion': 0.10 - } - - # 計算特質分數 - trait_score = sum(value for trait, value in versatile_traits.items() - if trait in temperament) - - # 計算角色分數 - role_score = sum(value for role, value in working_roles.items() - if role in description) - - # 根據使用者需求調整多功能性評分 - purpose_traits = { - 'light_walks': ['calm', 'gentle', 'easy-going'], - 'moderate_activity': ['adaptable', 'balanced', 'versatile'], - 'active_training': ['intelligent', 'trainable', 'working'] - } - - if user_prefs.exercise_type in purpose_traits: - matching_traits = sum(1 for trait in purpose_traits[user_prefs.exercise_type] - if trait in temperament) - trait_score += matching_traits * 0.15 - - factors['versatility'] = min(1.0, trait_score + role_score) - - # 2. 訓練性評估 - trainable_traits = { - 'intelligent': 0.3, - 'eager to please': 0.3, - 'trainable': 0.2, - 'quick learner': 0.2, - 'obedient': 0.2 - } - - base_trainability = sum(value for trait, value in trainable_traits.items() - if trait in temperament) - - # 根據使用者經驗調整訓練性評分 - experience_multipliers = { - 'beginner': 1.2, # 新手更需要容易訓練的狗 - 'intermediate': 1.0, - 'advanced': 0.8 # 專家能處理較難訓練的狗 - } - - factors['trainability'] = min(1.0, base_trainability * - experience_multipliers.get(user_prefs.experience_level, 1.0)) - - # 3. 能量水平評估 - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - energy_levels = { - 'VERY HIGH': { - 'score': 1.0, - 'min_exercise': 120, - 'ideal_exercise': 150 - }, - 'HIGH': { - 'score': 0.8, - 'min_exercise': 90, - 'ideal_exercise': 120 - }, - 'MODERATE': { - 'score': 0.6, - 'min_exercise': 60, - 'ideal_exercise': 90 - }, - 'LOW': { - 'score': 0.4, - 'min_exercise': 30, - 'ideal_exercise': 60 - } - } - - breed_energy = energy_levels.get(exercise_needs, energy_levels['MODERATE']) - - # 計算運動時間匹配度 - if user_prefs.exercise_time >= breed_energy['ideal_exercise']: - energy_score = breed_energy['score'] - else: - # 如果運動時間不足,按比例降低分數 - deficit_ratio = max(0.4, user_prefs.exercise_time / breed_energy['ideal_exercise']) - energy_score = breed_energy['score'] * deficit_ratio - - factors['energy_level'] = energy_score - - # 4. 美容需求評估 - grooming_needs = breed_info.get('Grooming Needs', 'MODERATE').upper() - grooming_levels = { - 'HIGH': 1.0, - 'MODERATE': 0.6, - 'LOW': 0.3 - } - - # 特殊毛髮類型評估 - coat_adjustments = 0 - if 'long coat' in description: - coat_adjustments += 0.2 - if 'double coat' in description: - coat_adjustments += 0.15 - if 'curly' in description: - coat_adjustments += 0.15 - - # 根據使用者承諾度調整 - commitment_multipliers = { - 'low': 1.5, # 低承諾度時加重美容需求的影響 - 'medium': 1.0, - 'high': 0.8 # 高承諾度時降低美容需求的影響 - } - - base_grooming = grooming_levels.get(grooming_needs, 0.6) + coat_adjustments - factors['grooming_needs'] = min(1.0, base_grooming * - commitment_multipliers.get(user_prefs.grooming_commitment, 1.0)) - - # 5. 社交需求評估 - social_traits = { - 'friendly': 0.25, - 'social': 0.25, - 'affectionate': 0.20, - 'people-oriented': 0.20 - } - - antisocial_traits = { - 'independent': -0.20, - 'aloof': -0.20, - 'reserved': -0.15 - } - - social_score = sum(value for trait, value in social_traits.items() - if trait in temperament) - antisocial_score = sum(value for trait, value in antisocial_traits.items() - if trait in temperament) - - # 家庭情況調整 - if user_prefs.has_children: - child_friendly_bonus = 0.2 if 'good with children' in temperament else 0 - social_score += child_friendly_bonus - - factors['social_needs'] = min(1.0, max(0.0, social_score + antisocial_score)) - - # 6. 氣候適應性評估 - 更細緻的環境適應評估 - climate_traits = { - 'cold': { - 'positive': ['thick coat', 'winter', 'cold climate'], - 'negative': ['short coat', 'heat sensitive'] - }, - 'hot': { - 'positive': ['short coat', 'heat tolerant', 'warm climate'], - 'negative': ['thick coat', 'cold climate'] - }, - 'moderate': { - 'positive': ['adaptable', 'all climate'], - 'negative': [] - } - } - - climate_score = 0.4 # 基礎分數 - if user_prefs.climate in climate_traits: - # 正面特質加分 - climate_score += sum(0.2 for term in climate_traits[user_prefs.climate]['positive'] - if term in description) - # 負面特質減分 - climate_score -= sum(0.2 for term in climate_traits[user_prefs.climate]['negative'] - if term in description) - - factors['weather_adaptability'] = min(1.0, max(0.0, climate_score)) - - # 7. 運動類型匹配評估 - exercise_type_traits = { - 'light_walks': ['calm', 'gentle'], - 'moderate_activity': ['adaptable', 'balanced'], - 'active_training': ['athletic', 'energetic'] - } - - if user_prefs.exercise_type in exercise_type_traits: - match_score = sum(0.25 for trait in exercise_type_traits[user_prefs.exercise_type] - if trait in temperament) - factors['exercise_match'] = min(1.0, match_score + 0.5) # 基礎分0.5 - - # 8. 生活方式適配評估 - lifestyle_score = 0.5 # 基礎分數 - - # 空間適配 - if user_prefs.living_space == 'apartment': - if size == 'Small': - lifestyle_score += 0.2 - elif size == 'Large': - lifestyle_score -= 0.2 - elif user_prefs.living_space == 'house_large': - if size in ['Large', 'Giant']: - lifestyle_score += 0.2 - - # 時間可用性適配 - time_availability_bonus = { - 'limited': -0.1, - 'moderate': 0, - 'flexible': 0.1 - } - lifestyle_score += time_availability_bonus.get(user_prefs.time_availability, 0) - - factors['lifestyle_fit'] = min(1.0, max(0.0, lifestyle_score)) - - return factors + return BonusPenaltyEngine.calculate_additional_factors(breed_info, user_prefs) def calculate_compatibility_score(breed_info: dict, user_prefs: UserPreferences) -> dict: @@ -440,7 +74,7 @@ def calculate_compatibility_score(breed_info: dict, user_prefs: UserPreferences) try: print(f"Processing breed: {breed_info.get('Breed', 'Unknown')}") print(f"Breed info keys: {breed_info.keys()}") - + if 'Size' not in breed_info: print("Missing Size information") raise KeyError("Size information missing") @@ -458,845 +92,70 @@ def calculate_compatibility_score(breed_info: dict, user_prefs: UserPreferences) 'adaptability_bonus': 0 } - def calculate_space_score(size: str, living_space: str, has_yard: bool, exercise_needs: str) -> float: - """ - 1. 動態的基礎分數矩陣 - 2. 強化空間品質評估 - 3. 增加極端情況處理 - 4. 考慮不同空間組合的協同效應 - """ - def get_base_score(): - # 基礎分數矩陣 - 更極端的分數分配 - base_matrix = { - "Small": { - "apartment": { - "no_yard": 0.85, # 小型犬在公寓仍然適合 - "shared_yard": 0.90, # 共享院子提供額外活動空間 - "private_yard": 0.95 # 私人院子最理想 - }, - "house_small": { - "no_yard": 0.80, - "shared_yard": 0.85, - "private_yard": 0.90 - }, - "house_large": { - "no_yard": 0.75, - "shared_yard": 0.80, - "private_yard": 0.85 - } - }, - "Medium": { - "apartment": { - "no_yard": 0.75, - "shared_yard": 0.85, - "private_yard": 0.90 - }, - "house_small": { - "no_yard": 0.80, - "shared_yard": 0.90, - "private_yard": 0.90 - }, - "house_large": { - "no_yard": 0.85, - "shared_yard": 0.90, - "private_yard": 0.95 - } - }, - "Large": { - "apartment": { - "no_yard": 0.70, - "shared_yard": 0.80, - "private_yard": 0.85 - }, - "house_small": { - "no_yard": 0.75, - "shared_yard": 0.85, - "private_yard": 0.90 - }, - "house_large": { - "no_yard": 0.85, - "shared_yard": 0.90, - "private_yard": 1.0 - } - }, - "Giant": { - "apartment": { - "no_yard": 0.65, - "shared_yard": 0.75, - "private_yard": 0.80 - }, - "house_small": { - "no_yard": 0.70, - "shared_yard": 0.80, - "private_yard": 0.85 - }, - "house_large": { - "no_yard": 0.80, - "shared_yard": 0.90, - "private_yard": 1.0 - } - } - } - - yard_type = "private_yard" if has_yard else "no_yard" - return base_matrix.get(size, base_matrix["Medium"])[living_space][yard_type] - - def calculate_exercise_adjustment(): - # 運動需求對空間評分的影響 - exercise_impact = { - "Very High": { - "apartment": -0.10, - "house_small": -0.05, - "house_large": 0 - }, - "High": { - "apartment": -0.08, - "house_small": -0.05, - "house_large": 0 - }, - "Moderate": { - "apartment": -0.5, - "house_small": -0.02, - "house_large": 0 - }, - "Low": { - "apartment": 0.10, - "house_small": 0.05, - "house_large": 0 - } - } - - return exercise_impact.get(exercise_needs, exercise_impact["Moderate"])[living_space] - - def calculate_yard_bonus(): - # 院子效益評估更加細緻 - if not has_yard: - return 0 - - yard_benefits = { - "Giant": { - "Very High": 0.25, - "High": 0.20, - "Moderate": 0.15, - "Low": 0.10 - }, - "Large": { - "Very High": 0.20, - "High": 0.15, - "Moderate": 0.10, - "Low": 0.05 - }, - "Medium": { - "Very High": 0.15, - "High": 0.10, - "Moderate": 0.08, - "Low": 0.05 - }, - "Small": { - "Very High": 0.10, - "High": 0.08, - "Moderate": 0.05, - "Low": 0.03 - } - } - - size_benefits = yard_benefits.get(size, yard_benefits["Medium"]) - return size_benefits.get(exercise_needs, size_benefits["Moderate"]) - - def apply_extreme_case_adjustments(score): - # 處理極端情況 - if size == "Giant" and living_space == "apartment": - return score * 0.85 - - if size == "Large" and living_space == "apartment" and exercise_needs == "Very High": - return score * 0.85 - - if size == "Small" and living_space == "house_large" and exercise_needs == "Low": - return score * 0.9 # 低運動需求的小型犬在大房子可能過於寬敞 - - return score - - # 計算最終分數 - base_score = get_base_score() - exercise_adj = calculate_exercise_adjustment() - yard_bonus = calculate_yard_bonus() - - # 整合所有評分因素 - initial_score = base_score + exercise_adj + yard_bonus - - # 應用極端情況調整 - final_score = apply_extreme_case_adjustments(initial_score) - - # 確保分數在有效範圍內,但允許更極端的結果 - return max(0.05, min(1.0, final_score)) - - - def calculate_exercise_score(breed_needs: str, exercise_time: int, exercise_type: str, breed_size: str, living_space: str) -> float: - """ - 計算品種運動需求與使用者運動條件的匹配度 - 1. 不同品種的運動耐受度差異 - 2. 運動時間與類型的匹配度 - 3. 極端運動量的嚴格限制 - - Parameters: - breed_needs: 品種的運動需求等級 - exercise_time: 使用者計劃的運動時間(分鐘) - exercise_type: 運動類型(輕度/中度/高度) - - Returns: - float: 0.1到1.0之間的匹配分數 - """ - # 定義每個運動需求等級的具體參數 - exercise_levels = { - 'VERY HIGH': { - 'min': 120, # 最低需求 - 'ideal': 150, # 理想運動量 - 'max': 180, # 最大建議量 - 'type_weights': { # 不同運動類型的權重 - 'active_training': 1.0, - 'moderate_activity': 0.6, - 'light_walks': 0.3 - } - }, - 'HIGH': { - 'min': 90, - 'ideal': 120, - 'max': 150, - 'type_weights': { - 'active_training': 0.9, - 'moderate_activity': 0.8, - 'light_walks': 0.4 - } - }, - 'MODERATE': { - 'min': 45, - 'ideal': 60, - 'max': 90, - 'type_weights': { - 'active_training': 0.7, - 'moderate_activity': 1.0, - 'light_walks': 0.8 - } - }, - 'LOW': { - 'min': 15, - 'ideal': 30, - 'max': 45, - 'type_weights': { - 'active_training': 0.5, - 'moderate_activity': 0.8, - 'light_walks': 1.0 - } - } - } - - # 獲取品種的運動參數 - breed_level = exercise_levels.get(breed_needs.upper(), exercise_levels['MODERATE']) - - # 計算時間匹配度 - def calculate_time_score(): - """計算運動時間的匹配度,特別處理過度運動的情況""" - if exercise_time < breed_level['min']: - # 運動不足的嚴格懲罰 - deficit_ratio = exercise_time / breed_level['min'] - return max(0.1, deficit_ratio * 0.4) - - elif exercise_time <= breed_level['ideal']: - # 理想範圍內的漸進提升 - progress = (exercise_time - breed_level['min']) / (breed_level['ideal'] - breed_level['min']) - return 0.6 + (progress * 0.4) - - elif exercise_time <= breed_level['max']: - # 理想到最大範圍的平緩下降 - excess_ratio = (exercise_time - breed_level['ideal']) / (breed_level['max'] - breed_level['ideal']) - return 1.0 - (excess_ratio * 0.2) - - else: - # 過度運動的顯著懲罰 - excess = (exercise_time - breed_level['max']) / breed_level['max'] - # 低運動需求品種的過度運動懲罰更嚴重 - penalty_factor = 1.5 if breed_needs.upper() == 'LOW' else 1.0 - return max(0.1, 0.8 - (excess * 0.5 * penalty_factor)) - - # 計算運動類型匹配度 - def calculate_type_score(): - """評估運動類型的適合度,考慮品種特性""" - base_type_score = breed_level['type_weights'].get(exercise_type, 0.5) - - # 特殊情況處理 - if breed_needs.upper() == 'LOW' and exercise_type == 'active_training': - # 低運動需求品種不適合高強度運動 - base_type_score *= 0.5 - elif breed_needs.upper() == 'VERY HIGH' and exercise_type == 'light_walks': - # 高運動需求品種需要更多強度 - base_type_score *= 0.6 - - return base_type_score - - # 計算最終分數 - time_score = calculate_time_score() - type_score = calculate_type_score() - - # 根據運動需求等級調整權重 - if breed_needs.upper() == 'LOW': - # 低運動需求品種更重視運動類型的合適性 - final_score = (time_score * 0.6) + (type_score * 0.4) - elif breed_needs.upper() == 'VERY HIGH': - # 高運動需求品種更重視運動時間的充足性 - final_score = (time_score * 0.7) + (type_score * 0.3) - else: - final_score = (time_score * 0.65) + (type_score * 0.35) - - if breed_info['Size'] in ['Large', 'Giant'] and user_prefs.living_space == 'apartment': - if exercise_time >= 120: - final_score = min(1.0, final_score * 1.2) - - # 極端情況的最終調整 - if breed_needs.upper() == 'LOW' and exercise_time > breed_level['max'] * 2: - # 低運動需求品種的過度運動顯著降分 - final_score *= 0.6 - elif breed_needs.upper() == 'VERY HIGH' and exercise_time < breed_level['min'] * 0.5: - # 高運動需求品種運動嚴重不足降分 - final_score *= 0.5 - - return max(0.1, min(1.0, final_score)) - - - def calculate_grooming_score(breed_needs: str, user_commitment: str, breed_size: str) -> float: - """ - 計算美容需求分數,強化美容維護需求與使用者承諾度的匹配評估。 - 這個函數特別注意品種大小對美容工作的影響,以及不同程度的美容需求對時間投入的要求。 - """ - # 重新設計基礎分數矩陣,讓美容需求的差異更加明顯 - base_scores = { - "High": { - "low": 0.20, # 高需求對低承諾極不合適,顯著降低初始分數 - "medium": 0.65, # 中等承諾仍有挑戰 - "high": 1.0 # 高承諾最適合 - }, - "Moderate": { - "low": 0.45, # 中等需求對低承諾有困難 - "medium": 0.85, # 較好的匹配 - "high": 0.95 # 高承諾會有餘力 - }, - "Low": { - "low": 0.90, # 低需求對低承諾很合適 - "medium": 0.85, # 略微降低以反映可能過度投入 - "high": 0.80 # 可能造成資源浪費 - } - } - - # 取得基礎分數 - base_score = base_scores.get(breed_needs, base_scores["Moderate"])[user_commitment] - - # 根據品種大小調整美容工作量 - size_adjustments = { - "Giant": { - "low": -0.20, # 大型犬的美容工作量顯著增加 - "medium": -0.10, - "high": -0.05 - }, - "Large": { - "low": -0.15, - "medium": -0.05, - "high": 0 - }, - "Medium": { - "low": -0.10, - "medium": -0.05, - "high": 0 - }, - "Small": { - "low": -0.05, - "medium": 0, - "high": 0 - } - } - - # 應用體型調整 - size_adjustment = size_adjustments.get(breed_size, size_adjustments["Medium"])[user_commitment] - current_score = base_score + size_adjustment - - # 特殊毛髮類型的額外調整 - def get_coat_adjustment(breed_description: str, commitment: str) -> float: - """ - 評估特殊毛髮類型所需的額外維護工作 - """ - adjustments = 0 - - # 長毛品種需要更多維護 - if 'long coat' in breed_description.lower(): - coat_penalties = { - 'low': -0.20, - 'medium': -0.15, - 'high': -0.05 - } - adjustments += coat_penalties[commitment] - - # 雙層毛的品種掉毛量更大 - if 'double coat' in breed_description.lower(): - double_coat_penalties = { - 'low': -0.15, - 'medium': -0.10, - 'high': -0.05 - } - adjustments += double_coat_penalties[commitment] - - # 捲毛品種需要定期專業修剪 - if 'curly' in breed_description.lower(): - curly_penalties = { - 'low': -0.15, - 'medium': -0.10, - 'high': -0.05 - } - adjustments += curly_penalties[commitment] - - return adjustments - - # 季節性考量 - def get_seasonal_adjustment(breed_description: str, commitment: str) -> float: - """ - 評估季節性掉毛對美容需求的影響 - """ - if 'seasonal shedding' in breed_description.lower(): - seasonal_penalties = { - 'low': -0.15, - 'medium': -0.10, - 'high': -0.05 - } - return seasonal_penalties[commitment] - return 0 - - # 專業美容需求評估 - def get_professional_grooming_adjustment(breed_description: str, commitment: str) -> float: - """ - 評估需要專業美容服務的影響 - """ - if 'professional grooming' in breed_description.lower(): - grooming_penalties = { - 'low': -0.20, - 'medium': -0.15, - 'high': -0.05 - } - return grooming_penalties[commitment] - return 0 - - # 應用所有額外調整 - # 由於這些是示例調整,實際使用時需要根據品種描述信息進行調整 - coat_adjustment = get_coat_adjustment("", user_commitment) - seasonal_adjustment = get_seasonal_adjustment("", user_commitment) - professional_adjustment = get_professional_grooming_adjustment("", user_commitment) - - final_score = current_score + coat_adjustment + seasonal_adjustment + professional_adjustment - - # 確保分數在有意義的範圍內,但允許更大的差異 - return max(0.1, min(1.0, final_score)) - - - def calculate_experience_score(care_level: str, user_experience: str, temperament: str) -> float: - """ - 計算使用者經驗與品種需求的匹配分數,更平衡的經驗等級影響 - - 改進重點: - 1. 提高初學者的基礎分數 - 2. 縮小經驗等級間的差距 - 3. 保持適度的區分度 - """ - # 基礎分數矩陣 - base_scores = { - "High": { - "beginner": 0.55, # 提高起始分,讓新手也有機會 - "intermediate": 0.80, # 中等經驗用戶可能有不錯的勝任能力 - "advanced": 0.95 # 資深者幾乎完全勝任 - }, - "Moderate": { - "beginner": 0.65, # 適中難度對新手更友善 - "intermediate": 0.85, # 中等經驗用戶相當適合 - "advanced": 0.90 # 資深者完全勝任 - }, - "Low": { - "beginner": 0.85, # 新手友善品種維持高分 - "intermediate": 0.90, # 中等經驗用戶幾乎完全勝任 - "advanced": 0.90 # 資深者完全勝任 - } - } - - # 取得基礎分數 - score = base_scores.get(care_level, base_scores["Moderate"])[user_experience] - - # 性格評估的權重 - temperament_lower = temperament.lower() - temperament_adjustments = 0.0 - - # 根據經驗等級設定不同的特徵評估標準,降低懲罰程度 - if user_experience == "beginner": - difficult_traits = { - 'stubborn': -0.15, - 'independent': -0.12, - 'dominant': -0.12, - 'strong-willed': -0.10, - 'protective': -0.10, - 'aloof': -0.08, - 'energetic': -0.08, - 'aggressive': -0.20 - } - - easy_traits = { - 'gentle': 0.08, - 'friendly': 0.08, - 'eager to please': 0.10, - 'patient': 0.08, - 'adaptable': 0.08, - 'calm': 0.08 - } - - # 計算特徵調整 - for trait, penalty in difficult_traits.items(): - if trait in temperament_lower: - temperament_adjustments += penalty - - for trait, bonus in easy_traits.items(): - if trait in temperament_lower: - temperament_adjustments += bonus - - # 品種類型特殊評估,降低懲罰程度 - if 'terrier' in temperament_lower: - temperament_adjustments -= 0.10 # 降低懲罰 - elif 'working' in temperament_lower: - temperament_adjustments -= 0.12 - elif 'guard' in temperament_lower: - temperament_adjustments -= 0.12 - - # 中等經驗用戶 - elif user_experience == "intermediate": - moderate_traits = { - 'stubborn': -0.08, - 'independent': -0.05, - 'intelligent': 0.10, - 'athletic': 0.08, - 'versatile': 0.08, - 'protective': -0.05 - } - - for trait, adjustment in moderate_traits.items(): - if trait in temperament_lower: - temperament_adjustments += adjustment - - else: # advanced - advanced_traits = { - 'stubborn': 0.05, - 'independent': 0.05, - 'intelligent': 0.10, - 'protective': 0.05, - 'strong-willed': 0.05 - } - - for trait, bonus in advanced_traits.items(): - if trait in temperament_lower: - temperament_adjustments += bonus - - # 確保最終分數範圍合理 - final_score = max(0.15, min(1.0, score + temperament_adjustments)) - - return final_score - - def calculate_health_score(breed_name: str, user_prefs: UserPreferences) -> float: - """ - 計算品種健康分數,加強健康問題的影響力和與使用者敏感度的連結 - - 1. 根據使用者的健康敏感度調整分數 - 2. 更嚴格的健康問題評估 - 3. 考慮多重健康問題的累積效應 - 4. 加入遺傳疾病的特別考量 - """ - if breed_name not in breed_health_info: - return 0.5 - - health_notes = breed_health_info[breed_name]['health_notes'].lower() - - # 嚴重健康問題 - 加重扣分 - severe_conditions = { - 'hip dysplasia': -0.20, # 髖關節發育不良,影響生活品質 - 'heart disease': -0.15, # 心臟疾病,需要長期治療 - 'progressive retinal atrophy': -0.15, # 進行性視網膜萎縮,導致失明 - 'bloat': -0.18, # 胃扭轉,致命風險 - 'epilepsy': -0.15, # 癲癇,需要長期藥物控制 - 'degenerative myelopathy': -0.15, # 脊髓退化,影響行動能力 - 'von willebrand disease': -0.12 # 血液凝固障礙 - } - - # 中度健康問題 - 適度扣分 - moderate_conditions = { - 'allergies': -0.12, # 過敏問題,需要持續關注 - 'eye problems': -0.15, # 眼睛問題,可能需要手術 - 'joint problems': -0.15, # 關節問題,影響運動能力 - 'hypothyroidism': -0.12, # 甲狀腺功能低下,需要藥物治療 - 'ear infections': -0.10, # 耳道感染,需要定期清理 - 'skin issues': -0.12 # 皮膚問題,需要特殊護理 - } - - # 輕微健康問題 - 輕微扣分 - minor_conditions = { - 'dental issues': -0.08, # 牙齒問題,需要定期護理 - 'weight gain tendency': -0.08, # 易胖體質,需要控制飲食 - 'minor allergies': -0.06, # 輕微過敏,可控制 - 'seasonal allergies': -0.06 # 季節性過敏 - } - - # 計算基礎健康分數 - health_score = 1.0 - - # 健康問題累積效應計算 - condition_counts = { - 'severe': 0, - 'moderate': 0, - 'minor': 0 - } - - # 計算各等級健康問題的數量和影響 - for condition, penalty in severe_conditions.items(): - if condition in health_notes: - health_score += penalty - condition_counts['severe'] += 1 - - for condition, penalty in moderate_conditions.items(): - if condition in health_notes: - health_score += penalty - condition_counts['moderate'] += 1 - - for condition, penalty in minor_conditions.items(): - if condition in health_notes: - health_score += penalty - condition_counts['minor'] += 1 - - # 多重問題的額外懲罰(累積效應) - if condition_counts['severe'] > 1: - health_score *= (0.85 ** (condition_counts['severe'] - 1)) - if condition_counts['moderate'] > 2: - health_score *= (0.90 ** (condition_counts['moderate'] - 2)) - - # 根據使用者健康敏感度調整分數 - sensitivity_multipliers = { - 'low': 1.1, # 較不在意健康問題 - 'medium': 1.0, # 標準評估 - 'high': 0.85 # 非常注重健康問題 - } - - health_score *= sensitivity_multipliers.get(user_prefs.health_sensitivity, 1.0) - - # 壽命影響評估 - try: - lifespan = breed_health_info[breed_name].get('average_lifespan', '10-12') - years = float(lifespan.split('-')[0]) - if years < 8: - health_score *= 0.85 # 短壽命顯著降低分數 - elif years < 10: - health_score *= 0.92 # 較短壽命輕微降低分數 - elif years > 13: - health_score *= 1.1 # 長壽命適度加分 - except: - pass - - # 特殊健康優勢 - if 'generally healthy' in health_notes or 'hardy breed' in health_notes: - health_score *= 1.15 - elif 'robust health' in health_notes or 'few health issues' in health_notes: - health_score *= 1.1 - - # 確保分數在合理範圍內,但允許更大的分數差異 - return max(0.1, min(1.0, health_score)) - - - def calculate_noise_score(breed_name: str, user_prefs: UserPreferences) -> float: - """ - 計算品種噪音分數,特別加強噪音程度與生活環境的關聯性評估,很多人棄養就是因為叫聲 - """ - if breed_name not in breed_noise_info: - return 0.5 - - noise_info = breed_noise_info[breed_name] - noise_level = noise_info['noise_level'].lower() - noise_notes = noise_info['noise_notes'].lower() - - # 重新設計基礎噪音分數矩陣,考慮不同情境下的接受度 - base_scores = { - 'low': { - 'low': 1.0, # 安靜的狗對低容忍完美匹配 - 'medium': 0.95, # 安靜的狗對一般容忍很好 - 'high': 0.90 # 安靜的狗對高容忍當然可以 - }, - 'medium': { - 'low': 0.60, # 一般吠叫對低容忍較困難 - 'medium': 0.90, # 一般吠叫對一般容忍可接受 - 'high': 0.95 # 一般吠叫對高容忍很好 - }, - 'high': { - 'low': 0.25, # 愛叫的狗對低容忍極不適合 - 'medium': 0.65, # 愛叫的狗對一般容忍有挑戰 - 'high': 0.90 # 愛叫的狗對高容忍可以接受 - }, - 'varies': { - 'low': 0.50, # 不確定的情況對低容忍風險較大 - 'medium': 0.75, # 不確定的情況對一般容忍可嘗試 - 'high': 0.85 # 不確定的情況對高容忍問題較小 - } - } - - # 取得基礎分數 - base_score = base_scores.get(noise_level, {'low': 0.6, 'medium': 0.75, 'high': 0.85})[user_prefs.noise_tolerance] - - # 吠叫原因評估,根據環境調整懲罰程度 - barking_penalties = { - 'separation anxiety': { - 'apartment': -0.30, # 在公寓對鄰居影響更大 - 'house_small': -0.25, - 'house_large': -0.20 - }, - 'excessive barking': { - 'apartment': -0.25, - 'house_small': -0.20, - 'house_large': -0.15 - }, - 'territorial': { - 'apartment': -0.20, # 在公寓更容易被觸發 - 'house_small': -0.15, - 'house_large': -0.10 - }, - 'alert barking': { - 'apartment': -0.15, # 公寓環境刺激較多 - 'house_small': -0.10, - 'house_large': -0.08 - }, - 'attention seeking': { - 'apartment': -0.15, - 'house_small': -0.12, - 'house_large': -0.10 - } - } - - # 計算環境相關的吠叫懲罰 - living_space = user_prefs.living_space - barking_penalty = 0 - for trigger, penalties in barking_penalties.items(): - if trigger in noise_notes: - barking_penalty += penalties.get(living_space, -0.15) - - # 特殊情況評估 - special_adjustments = 0 - if user_prefs.has_children: - # 孩童年齡相關調整 - child_age_adjustments = { - 'toddler': { - 'high': -0.20, # 幼童對吵鬧更敏感 - 'medium': -0.15, - 'low': -0.05 - }, - 'school_age': { - 'high': -0.15, - 'medium': -0.10, - 'low': -0.05 - }, - 'teenager': { - 'high': -0.10, - 'medium': -0.05, - 'low': -0.02 - } - } - - # 根據孩童年齡和噪音等級調整 - age_adj = child_age_adjustments.get(user_prefs.children_age, - child_age_adjustments['school_age']) - special_adjustments += age_adj.get(noise_level, -0.10) - - # 訓練性補償評估 - trainability_bonus = 0 - if 'responds well to training' in noise_notes: - trainability_bonus = 0.12 - elif 'can be trained' in noise_notes: - trainability_bonus = 0.08 - elif 'difficult to train' in noise_notes: - trainability_bonus = 0.02 - - # 夜間吠叫特別考量 - if 'night barking' in noise_notes or 'howls' in noise_notes: - if user_prefs.living_space == 'apartment': - special_adjustments -= 0.15 - elif user_prefs.living_space == 'house_small': - special_adjustments -= 0.10 - else: - special_adjustments -= 0.05 - - # 計算最終分數,確保更大的分數範圍 - final_score = base_score + barking_penalty + special_adjustments + trainability_bonus - return max(0.1, min(1.0, final_score)) - - - # 1. 計算基礎分數 - print("\n=== 開始計算品種相容性分數 ===") - print(f"處理品種: {breed_info.get('Breed', 'Unknown')}") - print(f"品種信息: {breed_info}") - print(f"使用者偏好: {vars(user_prefs)}") - - # 計算所有基礎分數 + # 計算所有基礎分數並整合到字典中 scores = { - 'space': calculate_space_score( - breed_info['Size'], + 'space': _dimension_calculator.calculate_space_score( + breed_info['Size'], user_prefs.living_space, user_prefs.yard_access != 'no_yard', breed_info.get('Exercise Needs', 'Moderate') ), - 'exercise': calculate_exercise_score( + 'exercise': _dimension_calculator.calculate_exercise_score( breed_info.get('Exercise Needs', 'Moderate'), user_prefs.exercise_time, user_prefs.exercise_type, breed_info['Size'], - user_prefs.living_space + user_prefs.living_space, + breed_info ), - 'grooming': calculate_grooming_score( + 'grooming': _dimension_calculator.calculate_grooming_score( breed_info.get('Grooming Needs', 'Moderate'), user_prefs.grooming_commitment.lower(), breed_info['Size'] ), - 'experience': calculate_experience_score( + 'experience': _dimension_calculator.calculate_experience_score( breed_info.get('Care Level', 'Moderate'), user_prefs.experience_level, breed_info.get('Temperament', '') ), - 'health': calculate_health_score( + 'health': _dimension_calculator.calculate_health_score( breed_info.get('Breed', ''), - user_prefs + user_prefs.health_sensitivity ), - 'noise': calculate_noise_score( + 'noise': _dimension_calculator.calculate_noise_score( breed_info.get('Breed', ''), - user_prefs + user_prefs.noise_tolerance, + user_prefs.living_space, + user_prefs.has_children, + user_prefs.children_age ) } - final_score = calculate_breed_compatibility_score( + final_score = _score_manager.calculate_breed_compatibility_score( scores=scores, user_prefs=user_prefs, breed_info=breed_info ) # 計算環境適應性加成 - adaptability_bonus = calculate_environmental_fit(breed_info, user_prefs) - - if (breed_info.get('Exercise Needs') == "Very High" and - user_prefs.living_space == "apartment" and + adaptability_bonus = _score_manager.calculate_environmental_fit(breed_info, user_prefs) + + if (breed_info.get('Exercise Needs') == "Very High" and + user_prefs.living_space == "apartment" and user_prefs.exercise_time < 90): final_score *= 0.85 # 高運動需求但條件不足的懲罰 # 整合最終分數和加成 combined_score = (final_score * 0.9) + (adaptability_bonus * 0.1) - + # 體型過濾 filtered_score = apply_size_filter( breed_score=combined_score, user_preference=user_prefs.size_preference, breed_size=breed_info['Size'] ) - - final_score = amplify_score_extreme(filtered_score) + + final_score = _bonus_engine.amplify_score_extreme(filtered_score) # 更新並返回完整的評分結果 scores.update({ @@ -1308,1013 +167,331 @@ def calculate_compatibility_score(breed_info: dict, user_prefs: UserPreferences) return scores except Exception as e: - print(f"\n!!!!! 發生嚴重錯誤 !!!!!") - print(f"錯誤類型: {type(e).__name__}") - print(f"錯誤訊息: {str(e)}") - print(f"完整錯誤追蹤:") + print(f"\n!!!!! Critical Error Occurred !!!!!") + print(f"Error Type: {type(e).__name__}") + print(f"Error Message: {str(e)}") + print(f"Full Error Traceback:") print(traceback.format_exc()) return {k: 0.6 for k in ['space', 'exercise', 'grooming', 'experience', 'health', 'noise', 'overall']} def calculate_environmental_fit(breed_info: dict, user_prefs: UserPreferences) -> float: """計算品種與環境的適應性加成""" - adaptability_score = 0.0 - description = breed_info.get('Description', '').lower() - temperament = breed_info.get('Temperament', '').lower() - - # 環境適應性評估 - if user_prefs.living_space == 'apartment': - if 'adaptable' in temperament or 'apartment' in description: - adaptability_score += 0.1 - if breed_info.get('Size') == 'Small': - adaptability_score += 0.05 - elif user_prefs.living_space == 'house_large': - if 'active' in temperament or 'energetic' in description: - adaptability_score += 0.1 - - # 氣候適應性 - if user_prefs.climate in description or user_prefs.climate in temperament: - adaptability_score += 0.05 - - return min(0.2, adaptability_score) - + return _score_manager.calculate_environmental_fit(breed_info, user_prefs) + def calculate_breed_compatibility_score(scores: dict, user_prefs: UserPreferences, breed_info: dict) -> float: - """ - 1. 運動類型與時間的精確匹配 - 2. 進階使用者的專業需求 - 3. 空間利用的實際效果 - 4. 條件組合的嚴格評估 - """ - def evaluate_perfect_conditions(): - """ - 評估條件匹配度: - 1. 運動類型與時間的綜合評估 - 2. 專業技能需求評估 - 3. 品種特性評估 - """ - perfect_matches = { - 'size_match': 0, - 'exercise_match': 0, - 'experience_match': 0, - 'living_condition_match': 0, - 'breed_trait_match': 0 - } - - # 第一部分:運動需求評估 - def evaluate_exercise_compatibility(): - """ - 評估運動需求的匹配度: - 1. 時間與強度的合理搭配 - 2. 不同品種的運動特性 - 3. 運動類型的適配性 - - 這個函數就像是一個體育教練,需要根據每個"運動員"(狗品種)的特點, - 為他們制定合適的訓練計劃。 - """ - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - exercise_time = user_prefs.exercise_time - exercise_type = user_prefs.exercise_type - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - - # 定義更精確的品種運動特性 - breed_exercise_patterns = { - 'sprint_type': { # 短跑型犬種,如 Whippet, Saluki - 'identifiers': ['fast', 'speed', 'sprint', 'racing', 'coursing', 'sight hound'], - 'ideal_exercise': { - 'active_training': 1.0, # 完美匹配高強度訓練 - 'moderate_activity': 0.5, # 持續運動不是最佳選擇 - 'light_walks': 0.3 # 輕度運動效果很差 - }, - 'time_ranges': { - 'ideal': (30, 60), # 最適合的運動時間範圍 - 'acceptable': (20, 90), # 可以接受的時間範圍 - 'penalty_start': 90 # 開始給予懲罰的時間點 - }, - 'penalty_rate': 0.8 # 超出範圍時的懲罰係數 - }, - 'endurance_type': { # 耐力型犬種,如 Border Collie - 'identifiers': ['herding', 'working', 'tireless', 'energetic', 'stamina', 'athletic'], - 'ideal_exercise': { - 'active_training': 0.9, # 高強度訓練很好 - 'moderate_activity': 1.0, # 持續運動是最佳選擇 - 'light_walks': 0.4 # 輕度運動不足 - }, - 'time_ranges': { - 'ideal': (90, 180), # 需要較長的運動時間 - 'acceptable': (60, 180), - 'penalty_start': 60 # 運動時間過短會受罰 - }, - 'penalty_rate': 0.7 - }, - 'moderate_type': { # 一般活動型犬種,如 Labrador - 'identifiers': ['friendly', 'playful', 'adaptable', 'versatile', 'companion'], - 'ideal_exercise': { - 'active_training': 0.8, - 'moderate_activity': 1.0, - 'light_walks': 0.6 - }, - 'time_ranges': { - 'ideal': (60, 120), - 'acceptable': (45, 150), - 'penalty_start': 150 - }, - 'penalty_rate': 0.6 - } - } - - def determine_breed_type(): - """改進品種運動類型的判斷,識別工作犬""" - # 優先檢查特殊運動類型的標識符 - for breed_type, pattern in breed_exercise_patterns.items(): - if any(identifier in temperament or identifier in description - for identifier in pattern['identifiers']): - return breed_type - - # 改進:根據運動需求和工作犬特徵進行更細緻的判斷 - if (exercise_needs in ['VERY HIGH', 'HIGH'] or - any(trait in temperament.lower() for trait in - ['herding', 'working', 'intelligent', 'athletic', 'tireless'])): - if user_prefs.experience_level == 'advanced': - return 'endurance_type' # 優先判定為耐力型 - elif exercise_needs == 'LOW': - return 'moderate_type' - - return 'moderate_type' - - def calculate_time_match(pattern): - """ - 計算運動時間的匹配度。 - 這就像在判斷運動時間是否符合訓練計劃。 - """ - ideal_min, ideal_max = pattern['time_ranges']['ideal'] - accept_min, accept_max = pattern['time_ranges']['acceptable'] - penalty_start = pattern['time_ranges']['penalty_start'] - - # 在理想範圍內 - if ideal_min <= exercise_time <= ideal_max: - return 1.0 - - # 超出可接受範圍的嚴格懲罰 - elif exercise_time < accept_min: - deficit = accept_min - exercise_time - return max(0.2, 1 - (deficit / accept_min) * 1.2) - elif exercise_time > accept_max: - excess = exercise_time - penalty_start - penalty = min(0.8, (excess / penalty_start) * pattern['penalty_rate']) - return max(0.2, 1 - penalty) - - # 在可接受範圍但不在理想範圍 - else: - if exercise_time < ideal_min: - progress = (exercise_time - accept_min) / (ideal_min - accept_min) - return 0.6 + (0.4 * progress) - else: - remaining = (accept_max - exercise_time) / (accept_max - ideal_max) - return 0.6 + (0.4 * remaining) - - def apply_special_adjustments(time_score, type_score, breed_type, pattern): - """ - 處理特殊情況,確保運動方式真正符合品種需求。 - 1. 短跑型犬種的長時間運動懲罰 - 2. 耐力型犬種的獎勵機制 - 3. 運動類型匹配的重要性 - """ - # 短跑型品種的特殊處理 - if breed_type == 'sprint_type': - if exercise_time > pattern['time_ranges']['penalty_start']: - # 加重長時間運動的懲罰 - penalty_factor = min(0.8, (exercise_time - pattern['time_ranges']['penalty_start']) / 60) - time_score *= max(0.3, 1 - penalty_factor) # 最低降到0.3 - # 運動類型不適合時的額外懲罰 - if exercise_type != 'active_training': - type_score *= 0.3 # 更嚴重的懲罰 - - # 耐力型品種的特殊處理 - elif breed_type == 'endurance_type': - if exercise_time < pattern['time_ranges']['penalty_start']: - time_score *= 0.5 # 維持運動不足的懲罰 - elif exercise_time >= 150: - if exercise_type in ['active_training', 'moderate_activity']: - time_bonus = min(0.3, (exercise_time - 150) / 150) - time_score = min(1.0, time_score * (1 + time_bonus)) - type_score = min(1.0, type_score * 1.2) - - # 運動強度不足的懲罰 - if exercise_type == 'light_walks': - if exercise_time > 90: - type_score *= 0.4 # 加重懲罰 - else: - type_score *= 0.5 - - return time_score, type_score - - # 執行評估流程 - breed_type = determine_breed_type() - pattern = breed_exercise_patterns[breed_type] - - # 計算基礎分數 - time_score = calculate_time_match(pattern) - type_score = pattern['ideal_exercise'].get(exercise_type, 0.5) - - # 應用特殊調整 - time_score, type_score = apply_special_adjustments(time_score, type_score, breed_type, pattern) - - # 根據品種類型決定最終權重 - if breed_type == 'sprint_type': - if exercise_time > pattern['time_ranges']['penalty_start']: - # 超時時更重視運動類型的匹配度 - return (time_score * 0.3) + (type_score * 0.7) - else: - return (time_score * 0.5) + (type_score * 0.5) - elif breed_type == 'endurance_type': - if exercise_time < pattern['time_ranges']['penalty_start']: - # 時間不足時更重視時間因素 - return (time_score * 0.7) + (type_score * 0.3) - else: - return (time_score * 0.6) + (type_score * 0.4) - else: - return (time_score * 0.5) + (type_score * 0.5) - - # 第二部分:專業技能需求評估 - def evaluate_expertise_requirements(): - care_level = breed_info.get('Care Level', 'MODERATE').upper() - temperament = breed_info.get('Temperament', '').lower() - - # 定義專業技能要求 - expertise_requirements = { - 'training_complexity': { - 'HIGH': {'beginner': 0.3, 'intermediate': 0.7, 'advanced': 1.0}, - 'MODERATE': {'beginner': 0.6, 'intermediate': 0.9, 'advanced': 1.0}, - 'LOW': {'beginner': 0.9, 'intermediate': 0.95, 'advanced': 0.9} - }, - 'special_traits': { - 'working': 0.2, # 工作犬需要額外技能 - 'herding': 0.2, # 牧羊犬需要特殊訓練 - 'intelligent': 0.15,# 高智商犬種需要心智刺激 - 'independent': 0.15,# 獨立性強的需要特殊處理 - 'protective': 0.1 # 護衛犬需要適當訓練 - } - } - - # 基礎分數 - base_score = expertise_requirements['training_complexity'][care_level][user_prefs.experience_level] - - # 特殊特徵評估 - trait_penalty = 0 - for trait, penalty in expertise_requirements['special_traits'].items(): - if trait in temperament: - if user_prefs.experience_level == 'beginner': - trait_penalty += penalty - elif user_prefs.experience_level == 'advanced': - trait_penalty -= penalty * 0.5 # 專家反而因應對特殊特徵而加分 - - return max(0.2, min(1.0, base_score - trait_penalty)) - - def evaluate_living_conditions() -> float: - """ - 評估生活環境適配性: - 1. 降低對大型犬的過度懲罰 - 2. 增加品種特性評估 - 3. 提升對適應性的重視度 - """ - size = breed_info['Size'] - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - - # 重新定義空間需求矩陣,降低對大型犬的懲罰 - space_requirements = { - 'apartment': { - 'Small': 1.0, - 'Medium': 0.8, - 'Large': 0.7, - 'Giant': 0.6 - }, - 'house_small': { - 'Small': 0.9, - 'Medium': 1.0, - 'Large': 0.8, - 'Giant': 0.7 - }, - 'house_large': { - 'Small': 0.8, - 'Medium': 0.9, - 'Large': 1.0, - 'Giant': 1.0 - } - } - - # 基礎空間分數 - space_score = space_requirements.get( - user_prefs.living_space, - space_requirements['house_small'] - )[size] - - # 品種適應性評估 - adaptability_bonus = 0 - adaptable_traits = ['adaptable', 'calm', 'quiet', 'gentle', 'laid-back'] - challenging_traits = ['hyperactive', 'restless', 'requires space'] - - # 計算適應性加分 - if user_prefs.living_space == 'apartment': - for trait in adaptable_traits: - if trait in temperament or trait in description: - adaptability_bonus += 0.1 - - # 特別處理大型犬的適應性 - if size in ['Large', 'Giant']: - apartment_friendly_traits = ['calm', 'gentle', 'quiet'] - matched_traits = sum(1 for trait in apartment_friendly_traits - if trait in temperament or trait in description) - if matched_traits > 0: - adaptability_bonus += 0.15 * matched_traits - - # 活動空間需求調整,更寬容的評估 - if exercise_needs in ['HIGH', 'VERY HIGH']: - if user_prefs.living_space != 'house_large': - space_score *= 0.9 # 從0.8提升到0.9,降低懲罰 - - # 院子可用性評估,提供更合理的獎勵 - yard_scores = { - 'no_yard': 0.85, # 從0.7提升到0.85 - 'shared_yard': 0.92, # 從0.85提升到0.92 - 'private_yard': 1.0 - } - yard_multiplier = yard_scores.get(user_prefs.yard_access, 0.85) - - # 根據體型調整院子重要性 - if size in ['Large', 'Giant']: - yard_importance = 1.2 - elif size == 'Medium': - yard_importance = 1.1 - else: - yard_importance = 1.0 - - # 計算最終分數 - final_score = space_score * (1 + adaptability_bonus) - - # 應用院子影響 - if user_prefs.yard_access != 'no_yard': - yard_bonus = (yard_multiplier - 1) * yard_importance - final_score = min(1.0, final_score + yard_bonus) - - # 確保分數在合理範圍內,但提供更高的基礎分數 - return max(0.4, min(1.0, final_score)) - - # 第四部分:品種特性評估 - def evaluate_breed_traits(): - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - - trait_scores = [] - - # 評估性格特徵 - if user_prefs.has_children: - if 'good with children' in description: - trait_scores.append(1.0) - elif 'patient' in temperament or 'gentle' in temperament: - trait_scores.append(0.8) - else: - trait_scores.append(0.5) - - # 評估適應性 - adaptability_keywords = ['adaptable', 'versatile', 'flexible'] - if any(keyword in temperament for keyword in adaptability_keywords): - trait_scores.append(1.0) - else: - trait_scores.append(0.7) - - return sum(trait_scores) / len(trait_scores) if trait_scores else 0.7 - - # 計算各項匹配分數 - perfect_matches['exercise_match'] = evaluate_exercise_compatibility() - perfect_matches['experience_match'] = evaluate_expertise_requirements() - perfect_matches['living_condition_match'] = evaluate_living_conditions() - perfect_matches['size_match'] = evaluate_living_conditions() # 共用生活環境評估 - perfect_matches['breed_trait_match'] = evaluate_breed_traits() - - return perfect_matches - - def calculate_weights() -> dict: - """ - 動態計算評分權重: - 1. 極端情況的權重調整 - 2. 使用者條件的協同效應 - 3. 品種特性的影響 - - Returns: - dict: 包含各評分項目權重的字典 - """ - # 定義基礎權重 - 提供更合理的起始分配 - base_weights = { - 'space': 0.25, # 提升空間權重,因為這是最基本的需求 - 'exercise': 0.25, # 運動需求同樣重要 - 'experience': 0.20, # 保持經驗的重要性 - 'grooming': 0.10, # 稍微降低美容需求的權重 - 'noise': 0.10, # 維持噪音評估的權重 - 'health': 0.10 # 維持健康評估的權重 - } - - def analyze_condition_extremity() -> dict: - """ - 評估使用者條件的極端程度,這影響權重的動態調整。 - 根據條件的極端程度返回相應的調整建議。 - """ - extremities = {} - - # 運動時間評估 - 更細緻的分級 - if user_prefs.exercise_time <= 30: - extremities['exercise'] = ('extremely_low', 0.8) - elif user_prefs.exercise_time <= 60: - extremities['exercise'] = ('low', 0.6) - elif user_prefs.exercise_time >= 180: - extremities['exercise'] = ('extremely_high', 0.8) - elif user_prefs.exercise_time >= 120: - extremities['exercise'] = ('high', 0.6) - else: - extremities['exercise'] = ('moderate', 0.3) - - # 空間限制評估 - 更合理的空間評估 - space_extremity = { - 'apartment': ('restricted', 0.7), - 'house_small': ('moderate', 0.5), - 'house_large': ('spacious', 0.3) - } - extremities['space'] = space_extremity.get(user_prefs.living_space, ('moderate', 0.5)) - - # 經驗水平評估 - 保持原有的評估邏輯 - experience_extremity = { - 'beginner': ('low', 0.7), - 'intermediate': ('moderate', 0.4), - 'advanced': ('high', 0.6) - } - extremities['experience'] = experience_extremity.get(user_prefs.experience_level, ('moderate', 0.5)) - - return extremities - - def calculate_weight_adjustments(extremities: dict) -> dict: - """ - 根據極端程度計算權重調整,特別注意條件組合的影響。 - """ - adjustments = {} - temperament = breed_info.get('Temperament', '').lower() - is_working_dog = any(trait in temperament - for trait in ['herding', 'working', 'intelligent', 'tireless']) - - # 空間權重調整 - if extremities['space'][0] == 'restricted': - if extremities['exercise'][0] in ['high', 'extremely_high']: - adjustments['space'] = 1.3 - adjustments['exercise'] = 2.3 - else: - adjustments['space'] = 1.6 - adjustments['noise'] = 1.5 - - # 運動需求權重調整 - if extremities['exercise'][0] in ['extremely_high', 'extremely_low']: - base_adjustment = 2.0 - if extremities['exercise'][0] == 'extremely_high': - if is_working_dog: - base_adjustment = 2.3 - adjustments['exercise'] = base_adjustment - - # 經驗需求權重調整 - if extremities['experience'][0] == 'low': - adjustments['experience'] = 1.8 - if breed_info.get('Care Level') == 'HIGH': - adjustments['experience'] = 2.0 - elif extremities['experience'][0] == 'high': - if is_working_dog: - adjustments['experience'] = 1.8 # 從2.5降低到1.8 - - # 特殊組合的處理 - def adjust_for_combinations(): - if (extremities['space'][0] == 'restricted' and - extremities['exercise'][0] in ['high', 'extremely_high']): - # 適度降低極端組合的影響 - adjustments['space'] = adjustments.get('space', 1.0) * 1.2 - adjustments['exercise'] = adjustments.get('exercise', 1.0) * 1.2 - - # 理想組合的獎勵 - if (extremities['experience'][0] == 'high' and - extremities['space'][0] == 'spacious' and - extremities['exercise'][0] in ['high', 'extremely_high'] and - is_working_dog): - adjustments['exercise'] = adjustments.get('exercise', 1.0) * 1.3 - adjustments['experience'] = adjustments.get('experience', 1.0) * 1.3 - - adjust_for_combinations() - return adjustments - - # 獲取條件極端度 - extremities = analyze_condition_extremity() - - # 計算權重調整 - weight_adjustments = calculate_weight_adjustments(extremities) - - # 應用權重調整,確保總和為1 - final_weights = base_weights.copy() - for key, adjustment in weight_adjustments.items(): - if key in final_weights: - final_weights[key] *= adjustment - - # 正規化權重 - total_weight = sum(final_weights.values()) - normalized_weights = {k: v/total_weight for k, v in final_weights.items()} - - return normalized_weights - - def calculate_weight_adjustments(extremities): - """ - 1. 高運動量時對耐力型犬種的偏好 - 2. 專家級別對工作犬種的偏好 - 3. 條件組合的整體評估 - """ - adjustments = {} - temperament = breed_info.get('Temperament', '').lower() - is_working_dog = any(trait in temperament - for trait in ['herding', 'working', 'intelligent', 'tireless']) - - # 空間權重調整邏輯保持不變 - if extremities['space'][0] == 'highly_restricted': - if extremities['exercise'][0] in ['high', 'extremely_high']: - adjustments['space'] = 1.8 # 降低空間限制的權重 - adjustments['exercise'] = 2.5 # 提高運動能力的權重 - else: - adjustments['space'] = 2.5 - adjustments['noise'] = 2.0 - elif extremities['space'][0] == 'restricted': - adjustments['space'] = 1.8 - adjustments['noise'] = 1.5 - elif extremities['space'][0] == 'spacious': - adjustments['space'] = 0.8 - adjustments['exercise'] = 1.4 - - # 改進運動需求權重調整 - if extremities['exercise'][0] in ['high', 'extremely_high']: - # 提高運動量高時的基礎分數 - base_exercise_adjustment = 2.2 - if user_prefs.living_space == 'apartment': - base_exercise_adjustment = 2.5 # 特別獎勵公寓住戶的高運動量 - adjustments['exercise'] = base_exercise_adjustment - if extremities['exercise'][0] in ['extremely_low', 'extremely_high']: - base_adjustment = 2.5 - if extremities['exercise'][0] == 'extremely_high': - if is_working_dog: - base_adjustment = 3.0 # 工作犬在高運動量時獲得更高權重 - adjustments['exercise'] = base_adjustment - elif extremities['exercise'][0] in ['low', 'high']: - adjustments['exercise'] = 1.8 - - # 改進經驗需求權重調整 - if extremities['experience'][0] == 'low': - adjustments['experience'] = 2.2 - if breed_info.get('Care Level') == 'HIGH': - adjustments['experience'] = 2.5 - elif extremities['experience'][0] == 'high': - if is_working_dog: - adjustments['experience'] = 2.5 - if extremities['exercise'][0] in ['high', 'extremely_high']: - adjustments['experience'] = 2.8 - else: - adjustments['experience'] = 1.8 - - # 綜合條件影響 - def adjust_for_combinations(): - # 保持原有的基礎邏輯 - if (extremities['space'][0] == 'highly_restricted' and - extremities['exercise'][0] in ['high', 'extremely_high']): - adjustments['space'] = adjustments.get('space', 1.0) * 1.3 - adjustments['exercise'] = adjustments.get('exercise', 1.0) * 1.3 - - # 專家 + 大空間 + 高運動量 + 工作犬的組合 - if (extremities['experience'][0] == 'high' and - extremities['space'][0] == 'spacious' and - extremities['exercise'][0] in ['high', 'extremely_high'] and - is_working_dog): - adjustments['exercise'] = adjustments.get('exercise', 1.0) * 1.4 - adjustments['experience'] = adjustments.get('experience', 1.0) * 1.4 - - if extremities['space'][0] == 'spacious': - for key in ['grooming', 'health', 'noise']: - if key not in adjustments: - adjustments[key] = 1.2 - - def ensure_minimum_score(score): - if all([ - extremities['exercise'][0] in ['high', 'extremely_high'], - breed_matches_exercise_needs(), # 檢查品種是否適合該運動量 - score < 0.85 - ]): - return 0.85 - return score - - adjust_for_combinations() - return adjustments - - # 獲取條件極端度 - extremities = analyze_condition_extremity() - - # 計算權重調整 - weight_adjustments = calculate_weight_adjustments(extremities) - - # 應用權重調整 - final_weights = base_weights.copy() - for key, adjustment in weight_adjustments.items(): - if key in final_weights: - final_weights[key] *= adjustment - - return final_weights - - def apply_special_case_adjustments(score: float) -> float: - """ - 處理特殊情況和極端案例的評分調整: - 1. 條件組合的協同效應 - 2. 品種特性的獨特需求 - 3. 極端情況的合理處理 - - Parameters: - score: 初始評分 - Returns: - float: 調整後的評分(0.2-1.0之間) - """ - severity_multiplier = 1.0 - - def evaluate_spatial_exercise_combination() -> float: - """ - 評估空間與運動需求的組合效應。 - - 這個函數不再過分懲罰大型犬,而是更多地考慮品種的實際特性。 - 就像評估一個運動員是否適合在特定場地訓練一樣,我們需要考慮 - 場地大小和運動需求的整體匹配度。 - """ - multiplier = 1.0 - - if user_prefs.living_space == 'apartment': - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - - # 檢查品種是否有利於公寓生活的特徵 - apartment_friendly = any(trait in temperament or trait in description - for trait in ['calm', 'adaptable', 'quiet']) - - # 大型犬的特殊處理 - if breed_info['Size'] in ['Large', 'Giant']: - if apartment_friendly: - multiplier *= 0.85 - else: - multiplier *= 0.75 - - # 檢查運動需求的匹配度 - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - exercise_time = user_prefs.exercise_time - - if exercise_needs in ['HIGH', 'VERY HIGH']: - if exercise_time >= 120: - multiplier *= 1.1 - - return multiplier - - def evaluate_experience_combination() -> float: - """ - 評估經驗需求的複合影響。 - - 這個函數就像是評估一個工作崗位與應聘者經驗的匹配度, - 需要綜合考慮工作難度和應聘者能力。 - """ - multiplier = 1.0 - temperament = breed_info.get('Temperament', '').lower() - care_level = breed_info.get('Care Level', 'MODERATE') - - # 新手飼主的特殊考慮,更寬容的評估標準 - if user_prefs.experience_level == 'beginner': - if care_level == 'HIGH': - if user_prefs.has_children: - multiplier *= 0.7 - else: - multiplier *= 0.8 - - # 性格特徵影響,降低懲罰程度 - challenging_traits = { - 'stubborn': -0.10, - 'independent': -0.08, - 'dominant': -0.08, - 'protective': -0.06, - 'aggressive': -0.15 - } - - for trait, penalty in challenging_traits.items(): - if trait in temperament: - multiplier *= (1 + penalty) - - return multiplier - - def evaluate_breed_specific_requirements() -> float: - """ - 評估品種特定需求。 - """ - multiplier = 1.0 - exercise_time = user_prefs.exercise_time - exercise_type = user_prefs.exercise_type - - # 檢查品種特性 - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - - # 運動需求匹配度評估,更合理的標準 - if exercise_needs == 'LOW': - if exercise_time > 120: - multiplier *= 0.85 - elif exercise_needs == 'VERY HIGH': - if exercise_time < 60: - multiplier *= 0.7 - - # 特殊品種類型的考慮 - if 'sprint' in temperament: - if exercise_time > 120 and exercise_type != 'active_training': - multiplier *= 0.85 - - if any(trait in temperament for trait in ['working', 'herding']): - if exercise_time < 90 or exercise_type == 'light_walks': - multiplier *= 0.8 - - return multiplier - - # 計算各項調整 - space_exercise_mult = evaluate_spatial_exercise_combination() - experience_mult = evaluate_experience_combination() - breed_specific_mult = evaluate_breed_specific_requirements() - - # 整合所有調整因素 - severity_multiplier *= space_exercise_mult - severity_multiplier *= experience_mult - severity_multiplier *= breed_specific_mult - - # 應用最終調整,確保分數在合理範圍內 - final_score = score * severity_multiplier - return max(0.2, min(1.0, final_score)) - - def calculate_base_score(scores: dict, weights: dict) -> float: - """ - 計算基礎評分分數 - 這個函數使用了改進後的評分邏輯: - 1. 降低關鍵指標的最低門檻,使系統更包容 - 2. 引入非線性評分曲線,讓分數分布更合理 - 3. 優化多重條件失敗的處理方式 - 4. 加強對品種特性的考慮 - - Parameters: - scores: 包含各項評分的字典 - weights: 包含各項權重的字典 - - Returns: - float: 0.2到1.0之間的基礎分數 - """ - # 重新定義關鍵指標閾值,提供更寬容的評分標準 - critical_thresholds = { - 'space': 0.35, - 'exercise': 0.35, - 'experience': 0.5, - 'noise': 0.5 - } - - # 評估關鍵指標失敗情況 - def evaluate_critical_failures() -> list: - """ - 評估關鍵指標的失敗情況,但採用更寬容的標準。 - 根據品種特性動態調整失敗判定。 - """ - failures = [] - temperament = breed_info.get('Temperament', '').lower() - - for metric, threshold in critical_thresholds.items(): - if scores[metric] < threshold: - # 特殊情況處理:適應性強的品種可以有更低的空間要求 - if metric == 'space' and any(trait in temperament - for trait in ['adaptable', 'calm', 'apartment']): - if scores[metric] >= threshold - 0.1: - continue - - # 運動需求的特殊處理 - elif metric == 'exercise': - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - if exercise_needs == 'LOW' and scores[metric] >= threshold - 0.1: - continue - - failures.append((metric, scores[metric])) - - return failures - - # 計算基礎分數 - def calculate_weighted_score() -> float: - """ - 計算加權分數,使用非線性函數使分數分布更合理。 - """ - weighted_scores = [] - for key, score in scores.items(): - if key in weights: - # 使用sigmoid函數使分數曲線更平滑 - adjusted_score = 1 / (1 + math.exp(-10 * (score - 0.5))) - weighted_scores.append(adjusted_score * weights[key]) - - return sum(weighted_scores) - - # 處理臨界失敗情況 - critical_failures = evaluate_critical_failures() - base_score = calculate_weighted_score() - - if critical_failures: - # 分離空間和運動相關的懲罰 - space_exercise_penalty = 0 - other_penalty = 0 - - for metric, score in critical_failures: - if metric in ['space', 'exercise']: - # 降低空間和運動失敗的懲罰程度 - penalty = (critical_thresholds[metric] - score) * 0.08 - space_exercise_penalty += penalty - else: - # 其他失敗的懲罰保持較高 - penalty = (critical_thresholds[metric] - score) * 0.20 - other_penalty += penalty - - # 計算總懲罰,但使用更溫和的方式 - total_penalty = (space_exercise_penalty + other_penalty) / 2 - base_score *= (1 - total_penalty) - - # 多重失敗的處理更寬容 - if len(critical_failures) > 1: - # 從0.98提升到0.99,降低多重失敗的疊加懲罰 - base_score *= (0.99 ** (len(critical_failures) - 1)) - - # 品種特性加分 - def apply_breed_bonus() -> float: - """ - 根據品種特性提供額外加分, - 特別是對於在特定環境下表現良好的品種。 - """ - bonus = 0 - temperament = breed_info.get('Temperament', '').lower() - description = breed_info.get('Description', '').lower() - - # 適應性加分 - adaptability_traits = ['adaptable', 'versatile', 'easy-going'] - if any(trait in temperament for trait in adaptability_traits): - bonus += 0.05 - - # 公寓適應性加分 - if user_prefs.living_space == 'apartment': - apartment_traits = ['calm', 'quiet', 'good for apartments'] - if any(trait in temperament or trait in description for trait in apartment_traits): - bonus += 0.05 - - return min(0.1, bonus) # 限制最大加分 - - # 應用品種特性加分 - breed_bonus = apply_breed_bonus() - base_score = min(1.0, base_score * (1 + breed_bonus)) - - # 確保最終分數在合理範圍內 - return max(0.2, min(1.0, base_score)) - - - def evaluate_condition_interactions(scores: dict) -> float: - """ - 評估不同條件間的相互影響,更寬容地處理極端組合 - """ - interaction_penalty = 1.0 - - # 只保留最基本的經驗相關評估 - if user_prefs.experience_level == 'beginner': - if breed_info.get('Care Level') == 'HIGH': - interaction_penalty *= 0.95 - - # 運動時間與類型的基本互動也降低懲罰程度 - exercise_needs = breed_info.get('Exercise Needs', 'MODERATE').upper() - if exercise_needs == 'VERY HIGH' and user_prefs.exercise_type == 'light_walks': - interaction_penalty *= 0.95 - - return interaction_penalty - - def calculate_adjusted_perfect_bonus(perfect_conditions: dict) -> float: - """ - 計算完美匹配獎勵,但更注重條件的整體表現。 - """ - bonus = 1.0 - - # 降低單項獎勵的影響力 - bonus += 0.06 * perfect_conditions['size_match'] - bonus += 0.06 * perfect_conditions['exercise_match'] - bonus += 0.06 * perfect_conditions['experience_match'] - bonus += 0.03 * perfect_conditions['living_condition_match'] - - # 如果有任何條件表現不佳,降低整體獎勵 - low_scores = [score for score in perfect_conditions.values() if score < 0.6] - if low_scores: - bonus *= (0.85 ** len(low_scores)) - - # 確保獎勵不會過高 - return min(1.25, bonus) - - def apply_breed_specific_adjustments(score: float) -> float: - """ - 根據品種特性進行最終調整。 - 考慮品種的特殊性質和限制因素。 - """ - # 檢查是否存在極端不匹配的情況 - exercise_mismatch = False - size_mismatch = False - experience_mismatch = False - - # 運動需求極端不匹配 - if breed_info.get('Exercise Needs', 'MODERATE').upper() == 'VERY HIGH': - if user_prefs.exercise_time < 90 or user_prefs.exercise_type == 'light_walks': - exercise_mismatch = True - - # 體型與空間極端不匹配 - if user_prefs.living_space == 'apartment' and breed_info['Size'] in ['Large', 'Giant']: - size_mismatch = True - - # 經驗需求極端不匹配 - if user_prefs.experience_level == 'beginner' and breed_info.get('Care Level') == 'HIGH': - experience_mismatch = True - - # 根據不匹配的數量進行懲罰 - mismatch_count = sum([exercise_mismatch, size_mismatch, experience_mismatch]) - if mismatch_count > 0: - score *= (0.8 ** mismatch_count) - - return score - - # 計算動態權重 - weights = calculate_weights() - - # 正規化權重 - total_weight = sum(weights.values()) - normalized_weights = {k: v/total_weight for k, v in weights.items()} - - # 計算基礎分數 - base_score = calculate_base_score(scores, normalized_weights) - - # 評估條件互動 - interaction_multiplier = evaluate_condition_interactions(scores) - - # 計算完美匹配獎勵 - perfect_conditions = evaluate_perfect_conditions() - perfect_bonus = calculate_adjusted_perfect_bonus(perfect_conditions) - - # 計算初步分數 - preliminary_score = base_score * interaction_multiplier * perfect_bonus - - # 應用品種特定調整 - final_score = apply_breed_specific_adjustments(preliminary_score) - - # 確保分數在合理範圍內,並降低最高可能分數 - max_possible_score = 0.96 # 降低最高可能分數 - min_possible_score = 0.3 - - return min(max_possible_score, max(min_possible_score, final_score)) + """計算品種相容性總分""" + return _score_manager.calculate_breed_compatibility_score(scores, user_prefs, breed_info) def amplify_score_extreme(score: float) -> float: - """ + """ + 優化分數分布,提供更有意義的評分範圍。 + 純粹進行數學轉換,不依賴外部資訊。 + Parameters: score: 原始評分(0-1之間的浮點數) - + Returns: float: 調整後的評分(0-1之間的浮點數) """ - def smooth_curve(x: float, steepness: float = 12) -> float: - """創建平滑的S型曲線用於分數轉換""" - import math - return 1 / (1 + math.exp(-steepness * (x - 0.5))) - - # 90-100分的轉換(極佳匹配) - if score >= 0.90: - position = (score - 0.90) / 0.10 - return 0.96 + (position * 0.04) - - # 80-90分的轉換(優秀匹配) - elif score >= 0.80: - position = (score - 0.80) / 0.10 - return 0.90 + (position * 0.06) - - # 70-80分的轉換(良好匹配) - elif score >= 0.70: - position = (score - 0.70) / 0.10 - return 0.82 + (position * 0.08) - - # 50-70分的轉換(可接受匹配) - elif score >= 0.50: - position = (score - 0.50) / 0.20 - return 0.75 + (smooth_curve(position) * 0.07) - - # 50分以下的轉換(較差匹配) - else: - position = score / 0.50 - return 0.70 + (smooth_curve(position) * 0.05) - - return round(min(1.0, max(0.0, score)), 4) \ No newline at end of file + return _bonus_engine.amplify_score_extreme(score) + + +class UnifiedScoringSystem: + """統一評分系統核心類""" + + def __init__(self): + """初始化評分系統""" + self.dimension_weights = { + 'space_compatibility': 0.30, # Increased from 0.25 + 'exercise_compatibility': 0.25, # Increased from 0.20 + 'grooming_compatibility': 0.10, # Reduced from 0.15 + 'experience_compatibility': 0.10, # Reduced from 0.15 + 'noise_compatibility': 0.15, # Adjusted + 'family_compatibility': 0.10 # Added + } + random.seed(42) # 確保一致性 + + def calculate_space_compatibility(self, breed_info: Dict, user_prefs: UserPreferences) -> DimensionalScore: + """計算空間適配性分數""" + breed_size = breed_info.get('Size', 'Medium').lower() + living_space = user_prefs.living_space + yard_access = user_prefs.yard_access + + # 基礎空間評分邏輯 + space_score = 0.5 # 基礎分數 + explanation_parts = [] + + # Enhanced size-space matrix with stricter penalties + size_space_matrix = { + 'apartment': { + 'toy': 0.95, 'small': 0.90, 'medium': 0.50, # Reduced medium score + 'large': 0.15, 'giant': 0.05 # Severe penalties for large/giant + }, + 'house_small': { + 'toy': 0.85, 'small': 0.90, 'medium': 0.85, + 'large': 0.60, 'giant': 0.30 # Still penalize giant breeds + }, + 'house_medium': { # Added for medium houses + 'toy': 0.80, 'small': 0.85, 'medium': 0.95, + 'large': 0.85, 'giant': 0.60 # Giants still not ideal + }, + 'house_large': { + 'toy': 0.75, 'small': 0.80, 'medium': 0.90, + 'large': 0.95, 'giant': 0.95 + } + } + + # Determine actual living space category + if 'apartment' in living_space or 'small' in living_space: + space_category = 'apartment' + elif 'medium' in living_space: + space_category = 'house_medium' + elif 'large' in living_space: + space_category = 'house_large' + else: + space_category = 'house_small' + + # Get base score from matrix + base_score = size_space_matrix[space_category].get( + self._normalize_size(breed_size), 0.5 + ) + + # Apply additional penalties for exercise needs in small spaces + if space_category == 'apartment': + exercise_needs = breed_info.get('Exercise Needs', '').lower() + if 'high' in exercise_needs: + base_score *= 0.7 # 30% additional penalty + if 'very high' in exercise_needs: + base_score *= 0.5 # 50% additional penalty + + space_score = base_score + explanation_parts = [] + if base_score < 0.3: + explanation_parts.append(f"Poor match: {breed_size} dog in {space_category}") + elif base_score < 0.7: + explanation_parts.append(f"Moderate match: {breed_size} dog in {space_category}") + else: + explanation_parts.append(f"Good match: {breed_size} dog in {space_category}") + + # 院子需求調整 + if yard_access == 'private_yard': + space_score = min(1.0, space_score + 0.1) + explanation_parts.append("Private yard bonus") + elif yard_access == 'no_yard' and breed_size in ['large', 'giant']: + space_score *= 0.7 + explanation_parts.append("Large dog without yard penalty") + + # 運動需求考量 + exercise_needs = breed_info.get('Exercise Needs', 'Moderate').lower() + if exercise_needs in ['high', 'very high'] and living_space == 'apartment': + space_score *= 0.8 + explanation_parts.append("High exercise needs in apartment limitation") + + explanation = "; ".join(explanation_parts) + + return DimensionalScore( + dimension_name='space_compatibility', + raw_score=space_score, + weight=self.dimension_weights['space_compatibility'], + display_score=space_score, + explanation=explanation + ) + + def calculate_exercise_compatibility(self, breed_info: Dict, user_prefs: UserPreferences) -> DimensionalScore: + """計算運動適配性分數""" + breed_exercise_needs = breed_info.get('Exercise Needs', 'Moderate').lower() + user_exercise_time = user_prefs.exercise_time + user_exercise_type = user_prefs.exercise_type + + # 運動需求映射 + exercise_requirements = { + 'low': {'min_time': 20, 'ideal_time': 30}, + 'moderate': {'min_time': 45, 'ideal_time': 60}, + 'high': {'min_time': 90, 'ideal_time': 120}, + 'very high': {'min_time': 120, 'ideal_time': 180} + } + + breed_req = exercise_requirements.get(breed_exercise_needs, exercise_requirements['moderate']) + + # 基礎時間匹配度 + if user_exercise_time >= breed_req['ideal_time']: + time_score = 1.0 + time_explanation = "Sufficient exercise time" + elif user_exercise_time >= breed_req['min_time']: + time_score = 0.7 + 0.3 * (user_exercise_time - breed_req['min_time']) / (breed_req['ideal_time'] - breed_req['min_time']) + time_explanation = "Exercise time meets basic requirements" + else: + time_score = 0.3 * user_exercise_time / breed_req['min_time'] + time_explanation = "Insufficient exercise time" + + # Enhanced compatibility matrix + breed_level = self._parse_exercise_level(breed_exercise_needs) + user_level = self._get_user_exercise_level(user_exercise_time) + + compatibility_matrix = { + ('low', 'low'): 1.0, + ('low', 'moderate'): 0.85, + ('low', 'high'): 0.40, # Stronger penalty + ('low', 'very high'): 0.15, # Severe penalty + ('moderate', 'low'): 0.70, + ('moderate', 'moderate'): 1.0, + ('moderate', 'high'): 0.85, + ('moderate', 'very high'): 0.60, + ('high', 'low'): 0.20, # Severe penalty + ('high', 'moderate'): 0.65, + ('high', 'high'): 1.0, + ('high', 'very high'): 0.90, + } + + base_score = compatibility_matrix.get((user_level, breed_level), 0.5) + + # Check for exercise type compatibility + if hasattr(user_prefs, 'exercise_type'): + exercise_type_bonus = self._calculate_exercise_type_match( + breed_info, user_prefs.exercise_type + ) + base_score = base_score * 0.8 + exercise_type_bonus * 0.2 + + exercise_score = base_score + + explanation = f"{user_level} user with {breed_level} exercise breed" + + return DimensionalScore( + dimension_name='exercise_compatibility', + raw_score=exercise_score, + weight=self.dimension_weights['exercise_compatibility'], + display_score=exercise_score, + explanation=explanation + ) + + def _normalize_size(self, breed_size: str) -> str: + """Normalize breed size string""" + breed_size = breed_size.lower() + if 'giant' in breed_size: + return 'giant' + elif 'large' in breed_size: + return 'large' + elif 'medium' in breed_size: + return 'medium' + elif 'small' in breed_size: + return 'small' + elif 'toy' in breed_size or 'tiny' in breed_size: + return 'toy' + else: + return 'medium' + + def _parse_exercise_level(self, exercise_description: str) -> str: + """Parse exercise level from description""" + exercise_lower = exercise_description.lower() + if any(term in exercise_lower for term in ['very high', 'extremely high', 'intense']): + return 'very high' + elif 'high' in exercise_lower: + return 'high' + elif any(term in exercise_lower for term in ['low', 'minimal']): + return 'low' + else: + return 'moderate' + + def _get_user_exercise_level(self, minutes: int) -> str: + """Convert exercise minutes to level""" + if minutes < 30: + return 'low' + elif minutes < 60: + return 'moderate' + else: + return 'high' + + def _calculate_exercise_type_match(self, breed_info: Dict, user_type: str) -> float: + """Calculate exercise type compatibility""" + breed_description = str(breed_info.get('Exercise Needs', '')).lower() + + if user_type == 'active_training': + if any(term in breed_description for term in ['agility', 'working', 'herding']): + return 1.0 + elif 'sprint' in breed_description: + return 0.6 # Afghan Hound case + elif user_type == 'light_walks': + if any(term in breed_description for term in ['gentle', 'moderate', 'light']): + return 1.0 + elif any(term in breed_description for term in ['intense', 'vigorous']): + return 0.3 + + return 0.7 # Default moderate match + + def calculate_unified_breed_score(self, breed_name: str, user_prefs: UserPreferences) -> UnifiedBreedScore: + """計算統一品種分數""" + # 獲取品種資訊 + try: + breed_info = get_dog_description(breed_name.replace(' ', '_')) + except ImportError: + breed_info = None + + if not breed_info: + return self._get_default_breed_score(breed_name) + + breed_info['breed_name'] = breed_name + + # 計算各維度分數 (簡化版,包含主要維度) + dimensional_scores = [ + self.calculate_space_compatibility(breed_info, user_prefs), + self.calculate_exercise_compatibility(breed_info, user_prefs) + ] + + # 計算加權總分 + weighted_sum = sum(score.raw_score * score.weight for score in dimensional_scores) + total_weight = sum(score.weight for score in dimensional_scores) + base_overall_score = weighted_sum / total_weight if total_weight > 0 else 0.5 + + # 計算加分和扣分因素 + bonus_factors = {} + penalty_factors = {} + + # 應用加分扣分 + overall_score = max(0.0, min(1.0, base_overall_score)) + + return UnifiedBreedScore( + breed_name=breed_name, + overall_score=overall_score, + dimensional_scores=dimensional_scores, + bonus_factors=bonus_factors, + penalty_factors=penalty_factors, + confidence_level=0.8, + match_explanation=f"Breed assessment for {breed_name} based on unified scoring system", + warnings=[] + ) + + def _get_default_breed_score(self, breed_name: str) -> UnifiedBreedScore: + """獲取預設品種分數""" + default_dimensional_scores = [ + DimensionalScore('space_compatibility', 0.6, 0.25, 0.6, 'Insufficient information'), + DimensionalScore('exercise_compatibility', 0.6, 0.20, 0.6, 'Insufficient information') + ] + + return UnifiedBreedScore( + breed_name=breed_name, + overall_score=0.6, + dimensional_scores=default_dimensional_scores, + bonus_factors={}, + penalty_factors={}, + confidence_level=0.3, + match_explanation="Insufficient data available, recommend further research on this breed", + warnings=["Incomplete breed information, scores are for reference only"] + ) + + +def calculate_unified_breed_scores(breed_list: List[str], user_prefs: UserPreferences) -> List[UnifiedBreedScore]: + """計算多個品種的統一分數""" + scoring_system = UnifiedScoringSystem() + scores = [] + + for breed in breed_list: + breed_score = scoring_system.calculate_unified_breed_score(breed, user_prefs) + scores.append(breed_score) + + # 按總分排序 + scores.sort(key=lambda x: x.overall_score, reverse=True) + + return scores diff --git a/semantic_breed_recommender.py b/semantic_breed_recommender.py new file mode 100644 index 0000000000000000000000000000000000000000..6eac93e08516c7ec6fcf14ed4d3cf141783e19a4 --- /dev/null +++ b/semantic_breed_recommender.py @@ -0,0 +1,2215 @@ +import random +import hashlib +import numpy as np +import sqlite3 +import re +import traceback +from typing import List, Dict, Tuple, Optional, Any +from dataclasses import dataclass +from sentence_transformers import SentenceTransformer +import torch +from sklearn.metrics.pairwise import cosine_similarity +from dog_database import get_dog_description +from breed_health_info import breed_health_info +from breed_noise_info import breed_noise_info +from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores +from query_understanding import QueryUnderstandingEngine, analyze_user_query +from constraint_manager import ConstraintManager, apply_breed_constraints +from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore +from score_calibrator import ScoreCalibrator, calibrate_breed_scores +from config_manager import get_config_manager, get_standardized_breed_data + +@dataclass +class BreedDescriptionVector: + """Data structure for breed description vectorization""" + breed_name: str + description_text: str + embedding: np.ndarray + characteristics: Dict[str, Any] + +class SemanticBreedRecommender: + """ + Enhanced SBERT-based semantic breed recommendation system + Provides multi-dimensional natural language understanding for dog breed recommendations + """ + + def __init__(self): + """Initialize the semantic recommender""" + self.model_name = 'all-MiniLM-L6-v2' # Efficient SBERT model + self.sbert_model = None + self.breed_vectors = {} + self.breed_list = self._get_breed_list() + self.comparative_keywords = { + 'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8, + 'then': 0.7, 'second': 0.7, 'followed': 0.6, + 'third': 0.5, 'least': 0.3, 'dislike': 0.2 + } + # self.query_engine = QueryUnderstandingEngine() + # self.constraint_manager = ConstraintManager() + # self.multi_head_scorer = None # Will be initialized with SBERT model + # self.score_calibrator = ScoreCalibrator() + # self.config_manager = get_config_manager() + self._initialize_model() + self._build_breed_vectors() + + # Initialize multi-head scorer with SBERT model if enhanced mode is enabled + # if self.sbert_model: + # self.multi_head_scorer = MultiHeadScorer(self.sbert_model) + # print("Multi-head scorer initialized with SBERT model") + + def _get_breed_list(self) -> List[str]: + """Get breed list from database""" + try: + conn = sqlite3.connect('animal_detector.db') + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") + breeds = [row[0] for row in cursor.fetchall()] + cursor.close() + conn.close() + return breeds + except Exception as e: + print(f"Error getting breed list: {str(e)}") + # Backup breed list for Google Colab environment + return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', + 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier'] + + def _initialize_model(self): + """Initialize SBERT model with fallback""" + try: + print("Loading SBERT model...") + # Try different model names if the primary one fails + model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2'] + + for model_name in model_options: + try: + self.sbert_model = SentenceTransformer(model_name) + self.model_name = model_name + print(f"SBERT model {model_name} loaded successfully") + return + except Exception as model_e: + print(f"Failed to load {model_name}: {str(model_e)}") + continue + + # If all models fail + print("All SBERT models failed to load. Using basic text matching fallback.") + self.sbert_model = None + + except Exception as e: + print(f"Failed to initialize any SBERT model: {str(e)}") + print(traceback.format_exc()) + print("Will provide basic text-based recommendations without embeddings") + self.sbert_model = None + + def _create_breed_description(self, breed: str) -> str: + """Create comprehensive natural language description for breed with all key characteristics""" + try: + # Get all information sources + breed_info = get_dog_description(breed) or {} + health_info = breed_health_info.get(breed, {}) if breed_health_info else {} + noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {} + + breed_display_name = breed.replace('_', ' ') + description_parts = [] + + # 1. Basic size and physical characteristics + size = breed_info.get('Size', 'medium').lower() + description_parts.append(f"{breed_display_name} is a {size} sized dog breed") + + # 2. Temperament and personality (critical for matching) + temperament = breed_info.get('Temperament', '') + if temperament: + description_parts.append(f"with a {temperament.lower()} temperament") + + # 3. Exercise and activity level (critical for apartment living) + exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() + if 'high' in exercise_needs or 'very high' in exercise_needs: + description_parts.append("requiring high daily exercise and mental stimulation") + elif 'low' in exercise_needs or 'minimal' in exercise_needs: + description_parts.append("with minimal exercise requirements, suitable for apartment living") + else: + description_parts.append("with moderate exercise needs") + + # 4. Noise characteristics (critical for quiet requirements) + noise_level = noise_info.get('noise_level', 'moderate').lower() + if 'low' in noise_level or 'quiet' in noise_level: + description_parts.append("known for being quiet and rarely barking") + elif 'high' in noise_level or 'loud' in noise_level: + description_parts.append("tends to be vocal and bark frequently") + else: + description_parts.append("with moderate barking tendencies") + + # 5. Living space compatibility + if size in ['small', 'tiny']: + description_parts.append("excellent for small apartments and limited spaces") + elif size in ['large', 'giant']: + description_parts.append("requiring large living spaces and preferably a yard") + else: + description_parts.append("adaptable to various living situations") + + # 6. Grooming and maintenance + grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() + if 'high' in grooming_needs: + description_parts.append("requiring regular professional grooming") + elif 'low' in grooming_needs: + description_parts.append("with minimal grooming requirements") + else: + description_parts.append("with moderate grooming needs") + + # 7. Family compatibility + good_with_children = breed_info.get('Good with Children', 'Yes') + if good_with_children == 'Yes': + description_parts.append("excellent with children and families") + else: + description_parts.append("better suited for adult households") + + # 8. Intelligence and trainability (from database description) + intelligence_keywords = [] + description_text = breed_info.get('Description', '').lower() + + if description_text: + # Extract intelligence indicators from description + if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']): + intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner']) + elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']): + intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train']) + else: + intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency']) + + # Extract working/purpose traits from description + if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']): + intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented']) + elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']): + intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused']) + + # Add intelligence context to description + if intelligence_keywords: + description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}") + + # 9. Special characteristics and purpose (enhanced with database mining) + if breed_info.get('Description'): + desc = breed_info.get('Description', '')[:150] # Increased to 150 chars for more context + if desc: + # Extract key traits from description for better semantic matching + desc_lower = desc.lower() + key_traits = [] + + # Extract key behavioral traits from description + if 'friendly' in desc_lower: + key_traits.append('friendly') + if 'gentle' in desc_lower: + key_traits.append('gentle') + if 'energetic' in desc_lower or 'active' in desc_lower: + key_traits.append('energetic') + if 'calm' in desc_lower or 'peaceful' in desc_lower: + key_traits.append('calm') + if 'protective' in desc_lower or 'guard' in desc_lower: + key_traits.append('protective') + + trait_text = f" and {', '.join(key_traits)}" if key_traits else "" + description_parts.append(f"Known for: {desc.lower()}{trait_text}") + + # 9. Care level requirements + try: + care_level = breed_info.get('Care Level', 'moderate') + if isinstance(care_level, str): + description_parts.append(f"requiring {care_level.lower()} overall care level") + else: + description_parts.append("requiring moderate overall care level") + except Exception as e: + print(f"Error processing care level for {breed}: {str(e)}") + description_parts.append("requiring moderate overall care level") + + # 10. Lifespan information + try: + lifespan = breed_info.get('Lifespan', '10-12 years') + if lifespan and isinstance(lifespan, str) and lifespan.strip(): + description_parts.append(f"with a typical lifespan of {lifespan}") + else: + description_parts.append("with a typical lifespan of 10-12 years") + except Exception as e: + print(f"Error processing lifespan for {breed}: {str(e)}") + description_parts.append("with a typical lifespan of 10-12 years") + + # Create comprehensive description + full_description = '. '.join(description_parts) + '.' + + # Add comprehensive keywords for better semantic matching + keywords = [] + + # Basic breed name keywords + keywords.extend([word.lower() for word in breed_display_name.split()]) + + # Temperament keywords + if temperament: + keywords.extend([word.lower().strip(',') for word in temperament.split()]) + + # Size-based keywords + if 'small' in size or 'tiny' in size: + keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap']) + elif 'large' in size or 'giant' in size: + keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor']) + else: + keywords.extend(['medium', 'moderate', 'average', 'balanced']) + + # Activity level keywords + exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() + if 'high' in exercise_needs: + keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic']) + elif 'low' in exercise_needs: + keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary']) + else: + keywords.extend(['moderate', 'balanced', 'walks', 'regular']) + + # Noise level keywords + noise_level = noise_info.get('noise_level', 'moderate').lower() + if 'quiet' in noise_level or 'low' in noise_level: + keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise']) + elif 'high' in noise_level or 'loud' in noise_level: + keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog']) + + # Living situation keywords + if size in ['small', 'tiny'] and 'low' in exercise_needs: + keywords.extend(['apartment', 'city', 'urban', 'small-space']) + if size in ['large', 'giant'] or 'high' in exercise_needs: + keywords.extend(['house', 'yard', 'suburban', 'rural', 'space']) + + # Family keywords + good_with_children = breed_info.get('Good with Children', 'Yes') + if good_with_children == 'Yes': + keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle']) + + # Intelligence and trainability keywords (from database description mining) + if intelligence_keywords: + keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()]) + + # Grooming-based keywords (enhanced) + grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() + if 'high' in grooming_needs: + keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care']) + elif 'low' in grooming_needs: + keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go']) + else: + keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care']) + + # Lifespan-based keywords + lifespan = breed_info.get('Lifespan', '10-12 years') + if lifespan and isinstance(lifespan, str): + try: + # Extract years from lifespan string (e.g., "10-12 years" or "12-15 years") + import re + years = re.findall(r'\d+', lifespan) + if years: + avg_years = sum(int(y) for y in years) / len(years) + if avg_years >= 14: + keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan']) + elif avg_years <= 8: + keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care']) + else: + keywords.extend(['average-lifespan', 'moderate-longevity']) + except: + keywords.extend(['average-lifespan']) + + # Add keywords to description for better semantic matching + unique_keywords = list(set(keywords)) + keyword_text = ' '.join(unique_keywords) + full_description += f" Additional context: {keyword_text}" + + return full_description + + except Exception as e: + print(f"Error creating description for {breed}: {str(e)}") + return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics." + + def _build_breed_vectors(self): + """Build vector representations for all breeds""" + try: + print("Building breed vector database...") + + # Skip if model is not available + if self.sbert_model is None: + print("SBERT model not available, skipping vector building") + return + + for breed in self.breed_list: + description = self._create_breed_description(breed) + + # Generate embedding vector + embedding = self.sbert_model.encode(description, convert_to_tensor=False) + + # Get breed characteristics + breed_info = get_dog_description(breed) + characteristics = { + 'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium', + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', + 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', + 'temperament': breed_info.get('Temperament', '') if breed_info else '' + } + + self.breed_vectors[breed] = BreedDescriptionVector( + breed_name=breed, + description_text=description, + embedding=embedding, + characteristics=characteristics + ) + + print(f"Successfully built {len(self.breed_vectors)} breed vectors") + + except Exception as e: + print(f"Error building breed vectors: {str(e)}") + print(traceback.format_exc()) + raise + + def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: + """Parse comparative preference expressions""" + breed_scores = {} + + # Normalize input + text = user_input.lower() + + # Find breed names and preference keywords + for breed in self.breed_list: + breed_display = breed.replace('_', ' ').lower() + breed_words = breed_display.split() + + # Check if this breed is mentioned + breed_mentioned = False + for word in breed_words: + if word in text: + breed_mentioned = True + break + + if breed_mentioned: + # Find nearby preference keywords + breed_score = 0.5 # Default score + + # Look for keywords within 50 characters of breed name + breed_pos = text.find(breed_words[0]) + if breed_pos != -1: + # Check for keywords in context + context_start = max(0, breed_pos - 50) + context_end = min(len(text), breed_pos + 50) + context = text[context_start:context_end] + + for keyword, score in self.comparative_keywords.items(): + if keyword in context: + breed_score = max(breed_score, score) + + breed_scores[breed] = breed_score + + return breed_scores + + def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: + """Enhanced lifestyle keyword extraction with better pattern matching""" + keywords = { + 'living_space': [], + 'activity_level': [], + 'family_situation': [], + 'noise_preference': [], + 'size_preference': [], + 'care_level': [], + 'special_needs': [], + 'intelligence_preference': [], + 'grooming_preference': [], + 'lifespan_preference': [], + 'temperament_preference': [], + 'experience_level': [] + } + + text = user_input.lower() + + # Enhanced living space detection + apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor'] + house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm'] + + if any(term in text for term in apartment_terms): + keywords['living_space'].append('apartment') + if any(term in text for term in house_terms): + keywords['living_space'].append('house') + + # Enhanced activity level detection + high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging', + 'athletic', 'adventure', 'vigorous', 'high energy', 'workout'] + low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed', + 'peaceful', 'quiet lifestyle', 'minimal exercise'] + moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise'] + + if any(term in text for term in high_activity): + keywords['activity_level'].append('high') + if any(term in text for term in low_activity): + keywords['activity_level'].append('low') + if any(term in text for term in moderate_activity): + keywords['activity_level'].append('moderate') + + # Enhanced family situation detection + children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age'] + elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature'] + single_terms = ['single', 'alone', 'individual', 'solo', 'myself'] + + if any(term in text for term in children_terms): + keywords['family_situation'].append('children') + if any(term in text for term in elderly_terms): + keywords['family_situation'].append('elderly') + if any(term in text for term in single_terms): + keywords['family_situation'].append('single') + + # Enhanced noise preference detection + quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise', + 'soft-spoken', 'calm', 'tranquil'] + noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter'] + + if any(term in text for term in quiet_terms): + keywords['noise_preference'].append('low') + if any(term in text for term in noise_ok_terms): + keywords['noise_preference'].append('high') + + # Enhanced size preference detection + small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog'] + large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great'] + medium_terms = ['medium', 'moderate size', 'average', 'mid-sized'] + + if any(term in text for term in small_terms): + keywords['size_preference'].append('small') + if any(term in text for term in large_terms): + keywords['size_preference'].append('large') + if any(term in text for term in medium_terms): + keywords['size_preference'].append('medium') + + # Enhanced care level detection + low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go'] + high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing'] + + if any(term in text for term in low_care): + keywords['care_level'].append('low') + if any(term in text for term in high_care): + keywords['care_level'].append('high') + + # Intelligence preference detection (NEW) + smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant'] + independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves'] + + if any(term in text for term in smart_terms): + keywords['intelligence_preference'].append('high') + if any(term in text for term in independent_terms): + keywords['intelligence_preference'].append('independent') + + # Grooming preference detection (NEW) + low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat'] + high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming'] + + if any(term in text for term in low_grooming_terms): + keywords['grooming_preference'].append('low') + if any(term in text for term in high_grooming_terms): + keywords['grooming_preference'].append('high') + + # Lifespan preference detection (NEW) + long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity'] + healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution'] + + if any(term in text for term in long_lived_terms): + keywords['lifespan_preference'].append('long') + if any(term in text for term in healthy_terms): + keywords['lifespan_preference'].append('healthy') + + # Temperament preference detection (NEW) + gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile'] + playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy'] + protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive'] + friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious'] + + if any(term in text for term in gentle_terms): + keywords['temperament_preference'].append('gentle') + if any(term in text for term in playful_terms): + keywords['temperament_preference'].append('playful') + if any(term in text for term in protective_terms): + keywords['temperament_preference'].append('protective') + if any(term in text for term in friendly_terms): + keywords['temperament_preference'].append('friendly') + + # Experience level detection (NEW) + beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced'] + advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned'] + + if any(term in text for term in beginner_terms): + keywords['experience_level'].append('beginner') + if any(term in text for term in advanced_terms): + keywords['experience_level'].append('advanced') + + # Enhanced special needs detection + guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender'] + companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly'] + hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed'] + multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals'] + + if any(term in text for term in guard_terms): + keywords['special_needs'].append('guard') + if any(term in text for term in companion_terms): + keywords['special_needs'].append('companion') + if any(term in text for term in hypoallergenic_terms): + keywords['special_needs'].append('hypoallergenic') + if any(term in text for term in multi_pet_terms): + keywords['special_needs'].append('multi_pet') + + return keywords + + def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]: + """Apply size distribution correction to prevent large breed bias""" + if len(recommendations) < 10: + return recommendations + + # Analyze size distribution + size_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} + + for rec in recommendations: + breed_info = get_dog_description(rec['breed']) + if breed_info: + size = self._normalize_breed_size(breed_info.get('Size', 'Medium')) + size_counts[size] += 1 + + total_recs = len(recommendations) + large_giant_ratio = (size_counts['large'] + size_counts['giant']) / total_recs + + # If more than 70% are large/giant breeds, apply correction + if large_giant_ratio > 0.7: + corrected_recommendations = [] + size_quotas = {'toy': 2, 'small': 4, 'medium': 6, 'large': 2, 'giant': 1} + current_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} + + # First pass: add breeds within quotas + for rec in recommendations: + breed_info = get_dog_description(rec['breed']) + if breed_info: + size = self._normalize_breed_size(breed_info.get('Size', 'Medium')) + if current_counts[size] < size_quotas[size]: + corrected_recommendations.append(rec) + current_counts[size] += 1 + + # Second pass: fill remaining slots with best remaining candidates + remaining_slots = 15 - len(corrected_recommendations) + remaining_breeds = [rec for rec in recommendations if rec not in corrected_recommendations] + + corrected_recommendations.extend(remaining_breeds[:remaining_slots]) + return corrected_recommendations + + return recommendations + + def _normalize_breed_size(self, size: str) -> str: + """Normalize breed size to standard categories""" + if not isinstance(size, str): + return 'medium' + + size_lower = size.lower() + if any(term in size_lower for term in ['toy', 'tiny']): + return 'toy' + elif 'small' in size_lower: + return 'small' + elif 'medium' in size_lower: + return 'medium' + elif 'large' in size_lower: + return 'large' + elif any(term in size_lower for term in ['giant', 'extra large']): + return 'giant' + else: + return 'medium' + + def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]: + """Parse user requirements more accurately""" + requirements = { + 'living_space': None, + 'exercise_level': None, + 'preferred_size': None, + 'noise_tolerance': None + } + + input_lower = user_input.lower() + + # Living space detection + if 'apartment' in input_lower or 'small' in input_lower: + requirements['living_space'] = 'apartment' + elif 'large house' in input_lower or 'big' in input_lower: + requirements['living_space'] = 'large_house' + elif 'medium' in input_lower: + requirements['living_space'] = 'medium_house' + + # Exercise level detection + if "don't exercise" in input_lower or 'low exercise' in input_lower: + requirements['exercise_level'] = 'low' + elif any(term in input_lower for term in ['hiking', 'running', 'active']): + requirements['exercise_level'] = 'high' + elif '30 minutes' in input_lower or 'moderate' in input_lower: + requirements['exercise_level'] = 'moderate' + + # Size preference detection + if any(term in input_lower for term in ['small dog', 'tiny', 'toy']): + requirements['preferred_size'] = 'small' + elif any(term in input_lower for term in ['large dog', 'big dog']): + requirements['preferred_size'] = 'large' + elif 'medium' in input_lower: + requirements['preferred_size'] = 'medium' + + return requirements + + def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float: + """Enhanced hard constraints with stricter penalties""" + penalty = 0.0 + user_text_lower = user_input.lower() + + # Get breed information + breed_info = get_dog_description(breed) + if not breed_info: + return 0.0 + + breed_size = breed_info.get('Size', '').lower() + exercise_needs = breed_info.get('Exercise Needs', '').lower() + + # Apartment living constraints - MUCH STRICTER + if any(term in user_text_lower for term in ['apartment', 'flat', 'studio', 'small space']): + if 'giant' in breed_size: + return -2.0 # Complete elimination + elif 'large' in breed_size: + if any(term in exercise_needs for term in ['high', 'very high']): + return -2.0 # Complete elimination + else: + penalty -= 0.5 # Still significant penalty + elif 'medium' in breed_size and 'very high' in exercise_needs: + penalty -= 0.6 + + # Exercise mismatch constraints + if "don't exercise much" in user_text_lower or "low exercise" in user_text_lower: + if any(term in exercise_needs for term in ['very high', 'extreme', 'intense']): + return -2.0 # Complete elimination + elif 'high' in exercise_needs: + penalty -= 0.8 + + # Moderate lifestyle detection + if any(term in user_text_lower for term in ['moderate', 'balanced', '30 minutes', 'half hour']): + # Penalize extremes + if 'giant' in breed_size: + penalty -= 0.7 # Strong penalty for giants + elif 'very high' in exercise_needs: + penalty -= 0.5 + + # Children safety (existing logic remains but enhanced) + if any(term in user_text_lower for term in ['child', 'kids', 'family', 'baby']): + good_with_children = breed_info.get('Good with Children', '').lower() + if good_with_children == 'no': + return -2.0 # Complete elimination for safety + + return penalty + + def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: + """ + Enhanced multi-dimensional semantic breed recommendation + + Args: + user_input: User's natural language description + top_k: Number of recommendations to return + + Returns: + List of recommended breeds with enhanced scoring + """ + try: + # Stage 1: Query Understanding + dimensions = self.query_engine.analyze_query(user_input) + print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions") + + # Stage 2: Apply Constraints + filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k)) + print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates") + + if not filter_result.passed_breeds: + error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements." + print(f"ERROR: {error_msg}") + raise ValueError(error_msg) + + # Stage 3: Multi-head Scoring + if self.multi_head_scorer: + breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions) + print(f"Multi-head scoring completed for {len(breed_scores)} breeds") + else: + print("Multi-head scorer not available, using fallback scoring") + return self.get_semantic_recommendations(user_input, top_k) + + # Stage 4: Score Calibration + breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores] + calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples) + print(f"Score calibration: method={calibration_result.calibration_method}") + + # Stage 5: Generate Final Recommendations + final_recommendations = [] + + for i, breed_score in enumerate(breed_scores[:top_k]): + breed_name = breed_score.breed_name + + # Get calibrated score + calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score) + + # Get standardized breed info + standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_')) + if standardized_info: + breed_info = self._get_breed_info_from_standardized(standardized_info) + else: + breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} + + recommendation = { + 'breed': breed_name, + 'rank': i + 1, + 'overall_score': calibrated_score, + 'final_score': calibrated_score, + 'semantic_score': breed_score.semantic_component, + 'attribute_score': breed_score.attribute_component, + 'bidirectional_bonus': breed_score.bidirectional_bonus, + 'confidence_score': breed_score.confidence_score, + 'dimensional_breakdown': breed_score.dimensional_breakdown, + 'explanation': breed_score.explanation, + 'size': breed_info.get('Size', 'Unknown'), + 'temperament': breed_info.get('Temperament', ''), + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), + 'good_with_children': breed_info.get('Good with Children', 'Yes'), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'description': breed_info.get('Description', ''), + 'search_type': 'enhanced_description', + 'calibration_method': calibration_result.calibration_method, + 'applied_constraints': filter_result.applied_constraints, + 'relaxed_constraints': filter_result.relaxed_constraints, + 'warnings': filter_result.warnings + } + + final_recommendations.append(recommendation) + + # Apply size distribution correction before returning + corrected_recommendations = self._apply_size_distribution_correction(final_recommendations) + + # Stage 6: Apply Intelligent Trait Matching Enhancement + intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input) + + print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching") + return intelligence_enhanced_recommendations + + except Exception as e: + print(f"Error in enhanced semantic recommendations: {str(e)}") + print(traceback.format_exc()) + # Fallback to original method + return self.get_semantic_recommendations(user_input, top_k) + + def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]: + """Apply intelligent trait matching based on enhanced keyword extraction and database mining""" + try: + # Extract enhanced keywords from user input + extracted_keywords = self._extract_lifestyle_keywords(user_input) + + # Apply intelligent trait matching to each recommendation + enhanced_recommendations = [] + + for rec in recommendations: + breed_name = rec['breed'].replace(' ', '_') + + # Get breed database information + breed_info = get_dog_description(breed_name) or {} + + # Calculate intelligent trait bonuses + intelligence_bonus = 0.0 + trait_match_details = {} + + # 1. Intelligence Matching + if extracted_keywords.get('intelligence_preference'): + intelligence_pref = extracted_keywords['intelligence_preference'][0] + breed_desc = breed_info.get('Description', '').lower() + + if intelligence_pref == 'high': + if any(word in breed_desc for word in ['intelligent', 'smart', 'clever', 'quick to learn', 'trainable']): + intelligence_bonus += 0.05 + trait_match_details['intelligence_match'] = 'High intelligence match detected' + elif any(word in breed_desc for word in ['stubborn', 'independent', 'difficult']): + intelligence_bonus -= 0.02 + trait_match_details['intelligence_warning'] = 'May be challenging to train' + + elif intelligence_pref == 'independent': + if any(word in breed_desc for word in ['independent', 'stubborn', 'strong-willed']): + intelligence_bonus += 0.03 + trait_match_details['independence_match'] = 'Independent nature match' + + # 2. Grooming Preference Matching + if extracted_keywords.get('grooming_preference'): + grooming_pref = extracted_keywords['grooming_preference'][0] + breed_grooming = breed_info.get('Grooming Needs', '').lower() + + if grooming_pref == 'low' and 'low' in breed_grooming: + intelligence_bonus += 0.03 + trait_match_details['grooming_match'] = 'Low maintenance grooming match' + elif grooming_pref == 'high' and 'high' in breed_grooming: + intelligence_bonus += 0.03 + trait_match_details['grooming_match'] = 'High maintenance grooming match' + elif grooming_pref == 'low' and 'high' in breed_grooming: + intelligence_bonus -= 0.04 + trait_match_details['grooming_mismatch'] = 'High grooming needs may not suit preferences' + + # 3. Temperament Preference Matching + if extracted_keywords.get('temperament_preference'): + temp_prefs = extracted_keywords['temperament_preference'] + breed_temperament = breed_info.get('Temperament', '').lower() + breed_desc = breed_info.get('Description', '').lower() + + temp_text = (breed_temperament + ' ' + breed_desc).lower() + + for temp_pref in temp_prefs: + if temp_pref == 'gentle' and any(word in temp_text for word in ['gentle', 'calm', 'peaceful', 'mild']): + intelligence_bonus += 0.04 + trait_match_details['temperament_match'] = f'Gentle temperament match: {temp_pref}' + elif temp_pref == 'playful' and any(word in temp_text for word in ['playful', 'energetic', 'lively', 'fun']): + intelligence_bonus += 0.04 + trait_match_details['temperament_match'] = f'Playful temperament match: {temp_pref}' + elif temp_pref == 'protective' and any(word in temp_text for word in ['protective', 'guard', 'alert', 'watchful']): + intelligence_bonus += 0.04 + trait_match_details['temperament_match'] = f'Protective temperament match: {temp_pref}' + elif temp_pref == 'friendly' and any(word in temp_text for word in ['friendly', 'social', 'outgoing', 'people']): + intelligence_bonus += 0.04 + trait_match_details['temperament_match'] = f'Friendly temperament match: {temp_pref}' + + # 4. Experience Level Matching + if extracted_keywords.get('experience_level'): + exp_level = extracted_keywords['experience_level'][0] + breed_desc = breed_info.get('Description', '').lower() + + if exp_level == 'beginner': + # Favor easy-to-handle breeds for beginners + if any(word in breed_desc for word in ['easy', 'gentle', 'good for beginners', 'family', 'calm']): + intelligence_bonus += 0.06 + trait_match_details['beginner_friendly'] = 'Good choice for first-time owners' + elif any(word in breed_desc for word in ['challenging', 'dominant', 'requires experience', 'strong-willed']): + intelligence_bonus -= 0.08 + trait_match_details['experience_warning'] = 'May be challenging for first-time owners' + + elif exp_level == 'advanced': + # Advanced users can handle more challenging breeds + if any(word in breed_desc for word in ['working', 'requires experience', 'intelligent', 'strong']): + intelligence_bonus += 0.03 + trait_match_details['advanced_suitable'] = 'Good match for experienced owners' + + # 5. Lifespan Preference Matching + if extracted_keywords.get('lifespan_preference'): + lifespan_pref = extracted_keywords['lifespan_preference'][0] + breed_lifespan = breed_info.get('Lifespan', '10-12 years') + + try: + import re + years = re.findall(r'\d+', breed_lifespan) + if years: + avg_years = sum(int(y) for y in years) / len(years) + if lifespan_pref == 'long' and avg_years >= 13: + intelligence_bonus += 0.02 + trait_match_details['longevity_match'] = f'Long lifespan match: {breed_lifespan}' + elif lifespan_pref == 'healthy' and avg_years >= 12: + intelligence_bonus += 0.02 + trait_match_details['health_match'] = f'Healthy lifespan: {breed_lifespan}' + except: + pass + + # Apply the intelligence bonus to the overall score + original_score = rec['overall_score'] + enhanced_score = min(1.0, original_score + intelligence_bonus) + + # Create enhanced recommendation with trait matching details + enhanced_rec = rec.copy() + enhanced_rec['overall_score'] = enhanced_score + enhanced_rec['intelligence_bonus'] = intelligence_bonus + enhanced_rec['trait_match_details'] = trait_match_details + + # Add detailed explanation if significant enhancement occurred + if abs(intelligence_bonus) > 0.02: + enhancement_explanation = [] + for detail_key, detail_value in trait_match_details.items(): + enhancement_explanation.append(detail_value) + + if enhancement_explanation: + current_explanation = enhanced_rec.get('explanation', '') + enhanced_explanation = current_explanation + f" Enhanced matching: {'; '.join(enhancement_explanation)}" + enhanced_rec['explanation'] = enhanced_explanation + + enhanced_recommendations.append(enhanced_rec) + + # Re-sort by enhanced overall score + enhanced_recommendations.sort(key=lambda x: x['overall_score'], reverse=True) + + # Update ranks + for i, rec in enumerate(enhanced_recommendations): + rec['rank'] = i + 1 + + print(f"Applied intelligent trait matching with average bonus: {sum(r['intelligence_bonus'] for r in enhanced_recommendations) / len(enhanced_recommendations):.3f}") + + return enhanced_recommendations + + except Exception as e: + print(f"Error in intelligent trait matching: {str(e)}") + # Return original recommendations if trait matching fails + return recommendations + + def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: + """ + Get breed recommendations based on natural language description + + Args: + user_input: User's natural language description + top_k: Number of recommendations to return + + Returns: + List of recommended breeds + """ + try: + print(f"Processing user input: {user_input}") + + # Check if model is available - if not, raise error + if self.sbert_model is None: + error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again." + print(f"ERROR: {error_msg}") + raise RuntimeError(error_msg) + + # Generate user input embedding + user_embedding = self.sbert_model.encode(user_input, convert_to_tensor=False) + + # Parse comparative preferences + comparative_prefs = self._parse_comparative_preferences(user_input) + + # Extract lifestyle keywords + lifestyle_keywords = self._extract_lifestyle_keywords(user_input) + + # Calculate similarity with all breeds and apply constraints + similarities = [] + + for breed, breed_vector in self.breed_vectors.items(): + # Apply hard constraints first + constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics) + + # Skip breeds that violate critical constraints + if constraint_penalty <= -1.0: # Complete disqualification + continue + + # Basic semantic similarity + semantic_score = cosine_similarity( + [user_embedding], + [breed_vector.embedding] + )[0][0] + + # Comparative preference weighting + comparative_bonus = comparative_prefs.get(breed, 0.0) + + # Lifestyle matching bonus + lifestyle_bonus = self._calculate_lifestyle_bonus( + breed_vector.characteristics, + lifestyle_keywords + ) + + # Apply constraint penalties + lifestyle_bonus += constraint_penalty + + # Enhanced combined score with better distribution + # Apply exponential scaling to create more natural score spread + base_semantic = semantic_score ** 0.8 # Slightly compress high scores + enhanced_lifestyle = lifestyle_bonus * 2.0 # Amplify lifestyle matching + enhanced_comparative = comparative_bonus * 1.5 # Amplify breed preferences + + final_score = ( + base_semantic * 0.55 + + enhanced_comparative * 0.30 + + enhanced_lifestyle * 0.15 + ) + + # Add small random variation to break ties naturally + random.seed(hash(breed)) # Consistent for same breed + final_score += random.uniform(-0.03, 0.03) + + # Ensure final score doesn't exceed 1.0 + final_score = min(1.0, final_score) + + similarities.append({ + 'breed': breed, + 'score': final_score, + 'semantic_score': semantic_score, + 'comparative_bonus': comparative_bonus, + 'lifestyle_bonus': lifestyle_bonus + }) + + # Calculate standardized display scores with balanced distribution + breed_display_scores = [] + + # First, collect all semantic scores for normalization + all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities] + semantic_mean = np.mean(all_semantic_scores) + semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0 + + for breed_data in similarities: + breed = breed_data['breed'] + base_semantic = breed_data['semantic_score'] + + # Normalize semantic score to prevent extreme outliers + if semantic_std > 0: + normalized_semantic = (base_semantic - semantic_mean) / semantic_std + normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # Cap at 2 standard deviations + scaled_semantic = 0.5 + (normalized_semantic * 0.1) # Map to 0.3-0.7 range + else: + scaled_semantic = 0.5 + + # Get breed characteristics + breed_info = get_dog_description(breed) if breed != 'Unknown' else {} + breed_size = breed_info.get('Size', '').lower() if breed_info else '' + exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else '' + + # Calculate feature matching score (more important than pure semantic similarity) + feature_score = 0.0 + user_text = user_input.lower() + + # Size and space requirements (high weight) + if any(term in user_text for term in ['apartment', 'small', 'limited space']): + if 'small' in breed_size: + feature_score += 0.25 + elif 'medium' in breed_size: + feature_score += 0.05 + elif 'large' in breed_size or 'giant' in breed_size: + feature_score -= 0.30 + + # Exercise requirements (high weight) + if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']): + if 'low' in exercise_needs or 'minimal' in exercise_needs: + feature_score += 0.20 + elif 'high' in exercise_needs or 'very high' in exercise_needs: + feature_score -= 0.25 + elif any(term in user_text for term in ['active', 'high exercise', 'running', 'hiking']): + if 'high' in exercise_needs: + feature_score += 0.20 + elif 'low' in exercise_needs: + feature_score -= 0.15 + + # Family compatibility + if any(term in user_text for term in ['children', 'kids', 'family']): + good_with_children = breed_info.get('Good with Children', '') if breed_info else '' + if good_with_children == 'Yes': + feature_score += 0.10 + elif good_with_children == 'No': + feature_score -= 0.20 + + # Combine scores with balanced weights + final_score = ( + scaled_semantic * 0.35 + # Reduced semantic weight + feature_score * 0.45 + # Increased feature matching weight + breed_data['lifestyle_bonus'] * 0.15 + + breed_data['comparative_bonus'] * 0.05 + ) + + # Calculate base compatibility score + base_compatibility = final_score + + # Apply dynamic scoring with natural distribution + if base_compatibility >= 0.9: # Exceptional matches + score_range = (0.92, 0.98) + position = (base_compatibility - 0.9) / 0.1 + elif base_compatibility >= 0.75: # Excellent matches + score_range = (0.85, 0.91) + position = (base_compatibility - 0.75) / 0.15 + elif base_compatibility >= 0.6: # Good matches + score_range = (0.75, 0.84) + position = (base_compatibility - 0.6) / 0.15 + elif base_compatibility >= 0.45: # Fair matches + score_range = (0.65, 0.74) + position = (base_compatibility - 0.45) / 0.15 + elif base_compatibility >= 0.3: # Poor matches + score_range = (0.55, 0.64) + position = (base_compatibility - 0.3) / 0.15 + else: # Very poor matches + score_range = (0.45, 0.54) + position = max(0, base_compatibility / 0.3) + + # Calculate final score with natural variation + score_span = score_range[1] - score_range[0] + base_score = score_range[0] + (position * score_span) + + # Add controlled random variation for natural ranking + random.seed(hash(breed + user_input[:15])) + variation = random.uniform(-0.015, 0.015) + display_score = round(max(0.45, min(0.98, base_score + variation)), 3) + + breed_display_scores.append({ + 'breed': breed, + 'display_score': display_score, + 'semantic_score': base_semantic, + 'comparative_bonus': breed_data['comparative_bonus'], + 'lifestyle_bonus': breed_data['lifestyle_bonus'] + }) + + # Sort by display score to ensure ranking consistency + breed_display_scores.sort(key=lambda x: x['display_score'], reverse=True) + top_breeds = breed_display_scores[:top_k] + + # Convert to standard recommendation format + recommendations = [] + for i, breed_data in enumerate(top_breeds): + breed = breed_data['breed'] + display_score = breed_data['display_score'] + + # Get detailed information + breed_info = get_dog_description(breed) + + recommendation = { + 'breed': breed.replace('_', ' '), + 'rank': i + 1, + 'overall_score': display_score, # Use display score for consistency + 'final_score': display_score, # Ensure final_score matches overall_score + 'semantic_score': breed_data['semantic_score'], + 'comparative_bonus': breed_data['comparative_bonus'], + 'lifestyle_bonus': breed_data['lifestyle_bonus'], + 'size': breed_info.get('Size', 'Unknown') if breed_info else 'Unknown', + 'temperament': breed_info.get('Temperament', '') if breed_info else '', + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', + 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', + 'lifespan': breed_info.get('Lifespan', '10-12 years') if breed_info else '10-12 years', + 'description': breed_info.get('Description', '') if breed_info else '', + 'search_type': 'description' + } + + recommendations.append(recommendation) + + print(f"Generated {len(recommendations)} semantic recommendations") + return recommendations + + except Exception as e: + print(f"Failed to generate semantic recommendations: {str(e)}") + print(traceback.format_exc()) + return [] + + def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any], + lifestyle_keywords: Dict[str, List[str]]) -> float: + """Enhanced lifestyle matching bonus calculation""" + bonus = 0.0 + penalties = 0.0 + + # Enhanced size matching + breed_size = breed_characteristics.get('size', '').lower() + size_prefs = lifestyle_keywords.get('size_preference', []) + for pref in size_prefs: + if pref in breed_size: + bonus += 0.25 # Strong reward for size match + elif (pref == 'small' and 'large' in breed_size) or \ + (pref == 'large' and 'small' in breed_size): + penalties += 0.15 # Penalty for size mismatch + + # Enhanced activity level matching + breed_exercise = breed_characteristics.get('exercise_needs', '').lower() + activity_prefs = lifestyle_keywords.get('activity_level', []) + + if 'high' in activity_prefs: + if 'high' in breed_exercise or 'very high' in breed_exercise: + bonus += 0.2 + elif 'low' in breed_exercise: + penalties += 0.2 + elif 'low' in activity_prefs: + if 'low' in breed_exercise: + bonus += 0.2 + elif 'high' in breed_exercise or 'very high' in breed_exercise: + penalties += 0.25 + elif 'moderate' in activity_prefs: + if 'moderate' in breed_exercise: + bonus += 0.15 + + # Enhanced family situation matching + good_with_children = breed_characteristics.get('good_with_children', 'Yes') + family_prefs = lifestyle_keywords.get('family_situation', []) + + if 'children' in family_prefs: + if good_with_children == 'Yes': + bonus += 0.15 + else: + penalties += 0.3 # Strong penalty for non-child-friendly breeds + + # Enhanced living space matching + living_prefs = lifestyle_keywords.get('living_space', []) + if 'apartment' in living_prefs: + if 'small' in breed_size: + bonus += 0.2 + elif 'medium' in breed_size and 'low' in breed_exercise: + bonus += 0.1 + elif 'large' in breed_size or 'giant' in breed_size: + penalties += 0.2 # Penalty for large dogs in apartments + + # Noise preference matching + noise_prefs = lifestyle_keywords.get('noise_preference', []) + temperament = breed_characteristics.get('temperament', '').lower() + + if 'low' in noise_prefs: + # Reward quiet breeds + if any(term in temperament for term in ['gentle', 'calm', 'quiet']): + bonus += 0.1 + + # Care level matching + grooming_needs = breed_characteristics.get('grooming_needs', '').lower() + care_prefs = lifestyle_keywords.get('care_level', []) + + if 'low' in care_prefs and 'low' in grooming_needs: + bonus += 0.1 + elif 'high' in care_prefs and 'high' in grooming_needs: + bonus += 0.1 + elif 'low' in care_prefs and 'high' in grooming_needs: + penalties += 0.15 + + # Special needs matching + special_needs = lifestyle_keywords.get('special_needs', []) + + if 'guard' in special_needs: + if any(term in temperament for term in ['protective', 'alert', 'watchful']): + bonus += 0.1 + elif 'companion' in special_needs: + if any(term in temperament for term in ['affectionate', 'gentle', 'loyal']): + bonus += 0.1 + + # Calculate final bonus with penalties + final_bonus = bonus - penalties + return max(-0.3, min(0.5, final_bonus)) # Allow negative bonus but limit range + + def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]: + """Convert standardized breed info to dictionary format""" + try: + size_map = {1: 'Tiny', 2: 'Small', 3: 'Medium', 4: 'Large', 5: 'Giant'} + exercise_map = {1: 'Low', 2: 'Moderate', 3: 'High', 4: 'Very High'} + care_map = {1: 'Low', 2: 'Moderate', 3: 'High'} + + return { + 'Size': size_map.get(standardized_info.size_category, 'Medium'), + 'Exercise Needs': exercise_map.get(standardized_info.exercise_level, 'Moderate'), + 'Grooming Needs': care_map.get(standardized_info.care_complexity, 'Moderate'), + 'Good with Children': 'Yes' if standardized_info.child_compatibility >= 0.8 else + 'No' if standardized_info.child_compatibility <= 0.2 else 'Unknown', + 'Temperament': 'Varies by individual', + 'Lifespan': '10-12 years', + 'Description': f'A {size_map.get(standardized_info.size_category, "medium")} sized breed' + } + except Exception as e: + print(f"Error converting standardized info: {str(e)}") + return {} + + def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]: + """Get fallback recommendations when enhanced system fails""" + try: + safe_breeds = [ + ('Labrador Retriever', 0.85), + ('Golden Retriever', 0.82), + ('Cavalier King Charles Spaniel', 0.80), + ('French Bulldog', 0.78), + ('Boston Terrier', 0.76), + ('Bichon Frise', 0.74), + ('Pug', 0.72), + ('Cocker Spaniel', 0.70) + ] + + recommendations = [] + for i, (breed, score) in enumerate(safe_breeds[:top_k]): + breed_info = get_dog_description(breed.replace(' ', '_')) or {} + + recommendation = { + 'breed': breed, + 'rank': i + 1, + 'overall_score': score, + 'final_score': score, + 'semantic_score': score * 0.8, + 'comparative_bonus': 0.0, + 'lifestyle_bonus': 0.0, + 'size': breed_info.get('Size', 'Unknown'), + 'temperament': breed_info.get('Temperament', ''), + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), + 'good_with_children': breed_info.get('Good with Children', 'Yes'), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'description': breed_info.get('Description', ''), + 'search_type': 'fallback' + } + recommendations.append(recommendation) + + return recommendations + + except Exception as e: + print(f"Error generating fallback recommendations: {str(e)}") + return [] + + def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: + """簡化的增強推薦方法""" + try: + print(f"Processing enhanced recommendation: {user_input[:50]}...") + + # 使用基本語意匹配 + return self.get_semantic_recommendations(user_input, top_k) + + except Exception as e: + error_msg = f"Enhanced recommendation error: {str(e)}. Please check your description." + print(f"ERROR: {error_msg}") + print(traceback.format_exc()) + raise RuntimeError(error_msg) from e + + def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: + """增強用戶描述分析""" + text = user_description.lower() + analysis = { + 'mentioned_breeds': [], + 'lifestyle_keywords': {}, + 'preference_strength': {}, + 'constraint_requirements': [], + 'user_context': {} + } + + # 提取提及的品種 + for breed in self.breed_list: + breed_display = breed.replace('_', ' ').lower() + if breed_display in text or any(word in text for word in breed_display.split()): + analysis['mentioned_breeds'].append(breed) + # 簡單偏好強度分析 + if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']): + analysis['preference_strength'][breed] = 0.8 + else: + analysis['preference_strength'][breed] = 0.5 + + # 提取約束要求 + if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']): + analysis['constraint_requirements'].append('low_noise') + if any(word in text for word in ['apartment', 'small space', '公寓']): + analysis['constraint_requirements'].append('apartment_suitable') + if any(word in text for word in ['children', 'kids', 'family', '小孩']): + analysis['constraint_requirements'].append('child_friendly') + + # 提取用戶背景 + analysis['user_context'] = { + 'has_children': any(word in text for word in ['children', 'kids', '小孩']), + 'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house', + 'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate', + 'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']), + 'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate' + } + + return analysis + + def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences: + """從分析結果創建用戶偏好物件""" + context = analysis['user_context'] + + # 推斷居住空間類型 + living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small' + + # 推斷院子權限 + yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard' + + # 推斷運動時間 + activity_level = context.get('activity_level', 'moderate') + exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30} + exercise_time = exercise_time_map.get(activity_level, 60) + + # 推斷運動類型 + exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'} + exercise_type = exercise_type_map.get(activity_level, 'moderate_activity') + + # 推斷噪音容忍度 + noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium' + + return UserPreferences( + living_space=living_space, + yard_access=yard_access, + exercise_time=exercise_time, + exercise_type=exercise_type, + grooming_commitment='medium', + experience_level=context.get('experience_level', 'intermediate'), + time_availability='moderate', + has_children=context.get('has_children', False), + children_age='school_age' if context.get('has_children', False) else None, + noise_tolerance=noise_tolerance, + space_for_play=(living_space != 'apartment'), + other_pets=False, + climate='moderate', + health_sensitivity='medium', + barking_acceptance=noise_tolerance, + size_preference='no_preference' + ) + + def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: + """獲取候選品種列表""" + candidate_breeds = set() + + # 如果提及特定品種,優先包含 + if analysis['mentioned_breeds']: + candidate_breeds.update(analysis['mentioned_breeds']) + + # 根據約束要求過濾品種 + if 'apartment_suitable' in analysis['constraint_requirements']: + apartment_suitable = [ + 'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', + 'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu' + ] + candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list) + + if 'child_friendly' in analysis['constraint_requirements']: + child_friendly = [ + 'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel', + 'Bichon_Frise', 'Poodle', 'Cocker_Spaniel' + ] + candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list) + + # 如果候選品種不足,添加更多通用品種 + if len(candidate_breeds) < 20: + general_breeds = [ + 'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog', + 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier', + 'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer', + 'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua' + ] + candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list) + + return list(candidate_breeds)[:30] # 限制候選數量以提高效率 + + def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float: + """應用約束過濾,返回調整分數""" + penalty = 0.0 + + breed_info = get_dog_description(breed) + if not breed_info: + return penalty + + # 低噪音要求 + if 'low_noise' in analysis['constraint_requirements']: + noise_info = breed_noise_info.get(breed, {}) + noise_level = noise_info.get('noise_level', 'moderate').lower() + if 'high' in noise_level: + penalty -= 0.3 # 嚴重扣分 + elif 'low' in noise_level: + penalty += 0.1 # 輕微加分 + + # 公寓適合性 + if 'apartment_suitable' in analysis['constraint_requirements']: + size = breed_info.get('Size', '').lower() + exercise_needs = breed_info.get('Exercise Needs', '').lower() + + if size in ['large', 'giant']: + penalty -= 0.2 + elif size in ['small', 'tiny']: + penalty += 0.1 + + if 'high' in exercise_needs: + penalty -= 0.15 + + # 兒童友善性 + if 'child_friendly' in analysis['constraint_requirements']: + good_with_children = breed_info.get('Good with Children', 'Unknown') + if good_with_children == 'Yes': + penalty += 0.15 + elif good_with_children == 'No': + penalty -= 0.4 # 嚴重扣分 + + return penalty + + def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]: + """獲取品種特徵""" + breed_info = get_dog_description(breed) + if not breed_info: + return {} + + characteristics = { + 'size': breed_info.get('Size', 'Unknown'), + 'temperament': breed_info.get('Temperament', ''), + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), + 'good_with_children': breed_info.get('Good with Children', 'Unknown'), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'description': breed_info.get('Description', '') + } + + # 添加噪音資訊 + noise_info = breed_noise_info.get(breed, {}) + characteristics['noise_level'] = noise_info.get('noise_level', 'moderate') + + return characteristics + + def get_hybrid_recommendations(self, user_description: str, + user_preferences: Optional[Any] = None, + top_k: int = 15) -> List[Dict[str, Any]]: + """ + Hybrid recommendations: Combine semantic matching with traditional scoring + + Args: + user_description: User's natural language description + user_preferences: Optional structured preference settings + top_k: Number of recommendations to return + + Returns: + Hybrid recommendation results + """ + try: + # Get semantic recommendations + semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2) + + if not user_preferences: + return semantic_recommendations[:top_k] + + # Combine with traditional scoring + hybrid_results = [] + + for semantic_rec in semantic_recommendations: + breed_name = semantic_rec['breed'].replace(' ', '_') + + # Calculate traditional compatibility score + traditional_score = calculate_compatibility_score(user_preferences, breed_name) + + # Hybrid score (semantic 40% + traditional 60%) + hybrid_score = ( + semantic_rec['overall_score'] * 0.4 + + traditional_score * 0.6 + ) + + semantic_rec['hybrid_score'] = hybrid_score + semantic_rec['traditional_score'] = traditional_score + hybrid_results.append(semantic_rec) + + # Re-sort by hybrid score + hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True) + + # Update rankings + for i, result in enumerate(hybrid_results[:top_k]): + result['rank'] = i + 1 + result['overall_score'] = result['hybrid_score'] + + return hybrid_results[:top_k] + + except Exception as e: + print(f"Hybrid recommendation failed: {str(e)}") + print(traceback.format_exc()) + return self.get_semantic_recommendations(user_description, top_k) + +def get_breed_recommendations_by_description(user_description: str, + user_preferences: Optional[Any] = None, + top_k: int = 15) -> List[Dict[str, Any]]: + """Main interface function for getting breed recommendations by description""" + try: + print("Initializing Enhanced SemanticBreedRecommender...") + recommender = SemanticBreedRecommender() + + # 優先使用整合統一評分系統的增強推薦 + print("Using enhanced recommendation system with unified scoring") + results = recommender.get_enhanced_recommendations_with_unified_scoring(user_description, top_k) + + if results and len(results) > 0: + print(f"Generated {len(results)} enhanced recommendations successfully") + return results + else: + # 如果增強系統無結果,嘗試原有增強系統 + print("Enhanced unified system returned no results, trying original enhanced system") + results = recommender.get_enhanced_semantic_recommendations(user_description, top_k) + + if results and len(results) > 0: + return results + else: + # 最後回退到標準系統 + print("All enhanced systems failed, using standard system") + if user_preferences: + results = recommender.get_hybrid_recommendations(user_description, user_preferences, top_k) + else: + results = recommender.get_semantic_recommendations(user_description, top_k) + + if not results: + error_msg = f"All recommendation systems failed to generate results. Please check your input description and try again. Error details may be in the console." + print(f"ERROR: {error_msg}") + raise RuntimeError(error_msg) + return results + + except Exception as e: + error_msg = f"Critical error in recommendation system: {str(e)}. Please check your input and system configuration." + print(f"ERROR: {error_msg}") + print(traceback.format_exc()) + raise RuntimeError(error_msg) from e + + +def get_enhanced_recommendations_with_unified_scoring(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: + """簡化版本:基本語意推薦功能""" + try: + print(f"Processing description-based recommendation: {user_description[:50]}...") + + # 創建基本推薦器實例 + recommender = SemanticBreedRecommender() + + if not recommender.sbert_model: + print("SBERT model not available, using basic text matching...") + # 使用基本文字匹配邏輯 + return _get_basic_text_matching_recommendations(user_description, top_k) + + # 使用語意相似度推薦 + recommendations = [] + user_embedding = recommender.sbert_model.encode(user_description) + + # 計算所有品種的增強分數 + all_breed_scores = [] + for breed_name, breed_vector in recommender.breed_vectors.items(): + breed_embedding = breed_vector.embedding + similarity = cosine_similarity([user_embedding], [breed_embedding])[0][0] + + # 獲取品種資料 + breed_info = get_dog_description(breed_name) or {} + + # 計算增強的匹配分數 + enhanced_score = _calculate_enhanced_matching_score( + breed_name, breed_info, user_description, similarity + ) + + all_breed_scores.append((breed_name, enhanced_score, breed_info, similarity)) + + # 按 final_score 排序(而不是語意相似度) + all_breed_scores.sort(key=lambda x: x[1]['final_score'], reverse=True) + top_breeds = all_breed_scores[:top_k] + + for i, (breed, enhanced_score, breed_info, similarity) in enumerate(top_breeds): + recommendation = { + 'breed': breed.replace('_', ' '), + 'rank': i + 1, # 正確的排名 + 'overall_score': enhanced_score['final_score'], + 'final_score': enhanced_score['final_score'], + 'semantic_score': similarity, + 'comparative_bonus': enhanced_score['lifestyle_bonus'], + 'lifestyle_bonus': enhanced_score['lifestyle_bonus'], + 'size': breed_info.get('Size', 'Unknown'), + 'temperament': breed_info.get('Temperament', 'Unknown'), + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), + 'good_with_children': breed_info.get('Good with Children', 'Unknown'), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'description': breed_info.get('Description', 'No description available'), + 'search_type': 'description', + 'scores': enhanced_score['dimension_scores'] + } + recommendations.append(recommendation) + + print(f"Generated {len(recommendations)} semantic recommendations") + return recommendations + + except Exception as e: + error_msg = f"Error in semantic recommendation system: {str(e)}. Please check your input and try again." + print(f"ERROR: {error_msg}") + print(traceback.format_exc()) + raise RuntimeError(error_msg) from e + +def _calculate_enhanced_matching_score(breed: str, breed_info: dict, user_description: str, base_similarity: float) -> dict: + """計算增強的匹配分數,基於用戶描述和品種特性""" + try: + user_desc = user_description.lower() + + # 分析用戶需求 + space_requirements = _analyze_space_requirements(user_desc) + exercise_requirements = _analyze_exercise_requirements(user_desc) + noise_requirements = _analyze_noise_requirements(user_desc) + size_requirements = _analyze_size_requirements(user_desc) + family_requirements = _analyze_family_requirements(user_desc) + + # 獲取品種特性 + breed_size = breed_info.get('Size', '').lower() + breed_exercise = breed_info.get('Exercise Needs', '').lower() + breed_noise = breed_noise_info.get(breed, {}).get('noise_level', 'moderate').lower() + breed_temperament = breed_info.get('Temperament', '').lower() + breed_good_with_children = breed_info.get('Good with Children', '').lower() + + # 計算各維度匹配分數 + dimension_scores = {} + + # 空間匹配 (30% 權重) + space_score = _calculate_space_compatibility(space_requirements, breed_size, breed_exercise) + dimension_scores['space'] = space_score + + # 運動需求匹配 (25% 權重) + exercise_score = _calculate_exercise_compatibility(exercise_requirements, breed_exercise) + dimension_scores['exercise'] = exercise_score + + # 噪音匹配 (20% 權重) + noise_score = _calculate_noise_compatibility(noise_requirements, breed_noise) + dimension_scores['noise'] = noise_score + + # 體型匹配 (15% 權重) + size_score = _calculate_size_compatibility(size_requirements, breed_size) + dimension_scores['grooming'] = min(0.9, base_similarity + 0.1) # 美容需求基於語意相似度 + + # 家庭相容性 (10% 權重) + family_score = _calculate_family_compatibility(family_requirements, breed_good_with_children, breed_temperament) + dimension_scores['family'] = family_score + dimension_scores['experience'] = min(0.9, base_similarity + 0.05) # 經驗需求基於語意相似度 + + # 應用硬約束過濾 + constraint_penalty = _apply_hard_constraints_enhanced(user_desc, breed_info) + + # 計算加權總分 - 精確化維度權重配置 + # 根據指導建議重新平衡維度權重 + weighted_score = ( + space_score * 0.30 + # 空間相容性(降低5%) + exercise_score * 0.28 + # 運動需求匹配(降低2%) + noise_score * 0.18 + # 噪音控制(提升3%) + family_score * 0.12 + # 家庭相容性(提升2%) + size_score * 0.08 + # 體型匹配(降低2%) + min(0.9, base_similarity + 0.1) * 0.04 # 護理需求(新增獨立權重) + ) + + # 優化完美匹配獎勵機制 - 降低觸發門檻並增加層次 + perfect_match_bonus = 0.0 + if space_score >= 0.88 and exercise_score >= 0.88 and noise_score >= 0.85: + perfect_match_bonus = 0.08 # 卓越匹配獎勵 + elif space_score >= 0.82 and exercise_score >= 0.82 and noise_score >= 0.75: + perfect_match_bonus = 0.04 # 優秀匹配獎勵 + elif space_score >= 0.75 and exercise_score >= 0.75: + perfect_match_bonus = 0.02 # 良好匹配獎勵 + + # 結合語意相似度與維度匹配 - 調整為75%維度匹配 25%語義相似度 + base_combined_score = (weighted_score * 0.75 + base_similarity * 0.25) + perfect_match_bonus + + # 應用漸進式約束懲罰,但確保基礎分數保障 + raw_final_score = base_combined_score + constraint_penalty + + # 實施動態分數保障機制 - 提升至40-42%基礎分數 + # 根據品種特性動態調整基礎分數 + base_guaranteed_score = 0.42 # 提升基礎保障分數 + + # 特殊品種基礎分數調整 + high_adaptability_breeds = ['French_Bulldog', 'Pug', 'Golden_Retriever', 'Labrador_Retriever'] + if any(breed in breed for breed in high_adaptability_breeds): + base_guaranteed_score = 0.45 # 高適應性品種更高基礎分數 + + # 動態分數分佈優化 + if raw_final_score >= base_guaranteed_score: + # 對於高分品種,實施適度壓縮避免過度集中 + if raw_final_score > 0.85: + compression_factor = 0.92 # 輕度壓縮高分 + final_score = 0.85 + (raw_final_score - 0.85) * compression_factor + else: + final_score = raw_final_score + final_score = min(0.93, final_score) # 降低最高分數限制 + else: + # 對於低分品種,使用改進的保障機制 + normalized_raw_score = max(0.15, raw_final_score) + # 基礎保障75% + 實際計算25%,保持一定區分度 + final_score = base_guaranteed_score * 0.75 + normalized_raw_score * 0.25 + final_score = max(base_guaranteed_score, min(0.93, final_score)) + + lifestyle_bonus = max(0.0, weighted_score - base_similarity) + + return { + 'final_score': final_score, + 'weighted_score': weighted_score, + 'lifestyle_bonus': lifestyle_bonus, + 'dimension_scores': dimension_scores, + 'constraint_penalty': constraint_penalty + } + + except Exception as e: + print(f"Error in enhanced matching calculation for {breed}: {str(e)}") + return { + 'final_score': base_similarity, + 'weighted_score': base_similarity, + 'lifestyle_bonus': 0.0, + 'dimension_scores': { + 'space': base_similarity * 0.9, + 'exercise': base_similarity * 0.85, + 'grooming': base_similarity * 0.8, + 'experience': base_similarity * 0.75, + 'noise': base_similarity * 0.7, + 'family': base_similarity * 0.65 + }, + 'constraint_penalty': 0.0 + } + +def _analyze_space_requirements(user_desc: str) -> dict: + """分析空間需求 - 增強中等活動量識別""" + requirements = {'type': 'unknown', 'size': 'medium', 'importance': 0.5} + + if any(word in user_desc for word in ['apartment', 'small apartment', 'small space', 'condo', 'flat']): + requirements['type'] = 'apartment' + requirements['size'] = 'small' + requirements['importance'] = 0.95 # 提高重要性 + elif any(word in user_desc for word in ['medium-sized house', 'medium house', 'townhouse']): + requirements['type'] = 'medium_house' + requirements['size'] = 'medium' + requirements['importance'] = 0.8 # 中等活動量用戶的特殊標記 + elif any(word in user_desc for word in ['large house', 'big house', 'yard', 'garden', 'large space', 'backyard']): + requirements['type'] = 'house' + requirements['size'] = 'large' + requirements['importance'] = 0.7 + + return requirements + +def _analyze_exercise_requirements(user_desc: str) -> dict: + """分析運動需求 - 增強中等活動量識別""" + requirements = {'level': 'moderate', 'importance': 0.5} + + # 低運動量識別 + if any(word in user_desc for word in ["don't exercise", "don't exercise much", "low exercise", "minimal", "lazy", "not active"]): + requirements['level'] = 'low' + requirements['importance'] = 0.95 + # 中等運動量的精確識別 + elif any(phrase in user_desc for phrase in ['30 minutes', 'half hour', 'moderate', 'balanced', 'walk about']): + if 'walk' in user_desc or 'daily' in user_desc: + requirements['level'] = 'moderate' + requirements['importance'] = 0.85 # 中等活動量的特殊標記 + # 高運動量識別 + elif any(word in user_desc for word in ['active', 'hiking', 'outdoor activities', 'running', 'outdoors', 'love hiking']): + requirements['level'] = 'high' + requirements['importance'] = 0.9 + + return requirements + +def _analyze_noise_requirements(user_desc: str) -> dict: + """分析噪音需求""" + requirements = {'tolerance': 'medium', 'importance': 0.5} + + if any(word in user_desc for word in ['quiet', 'no bark', "won't bark", "doesn't bark", 'silent', 'peaceful']): + requirements['tolerance'] = 'low' + requirements['importance'] = 0.9 + elif any(word in user_desc for word in ['loud', 'barking ok', 'noise ok']): + requirements['tolerance'] = 'high' + requirements['importance'] = 0.7 + + return requirements + +def _analyze_size_requirements(user_desc: str) -> dict: + """分析體型需求""" + requirements = {'preferred': 'any', 'importance': 0.5} + + if any(word in user_desc for word in ['small', 'tiny', 'little', 'lap dog', 'compact']): + requirements['preferred'] = 'small' + requirements['importance'] = 0.8 + elif any(word in user_desc for word in ['large', 'big', 'giant']): + requirements['preferred'] = 'large' + requirements['importance'] = 0.8 + + return requirements + +def _analyze_family_requirements(user_desc: str) -> dict: + """分析家庭需求""" + requirements = {'children': False, 'importance': 0.3} + + if any(word in user_desc for word in ['children', 'kids', 'family', 'child']): + requirements['children'] = True + requirements['importance'] = 0.8 + + return requirements + +def _calculate_space_compatibility(space_req: dict, breed_size: str, breed_exercise: str) -> float: + """計算空間相容性分數 - 增強中等活動量處理""" + if space_req['type'] == 'apartment': + if 'small' in breed_size or 'toy' in breed_size: + base_score = 0.95 + elif 'medium' in breed_size: + if 'low' in breed_exercise: + base_score = 0.75 + else: + base_score = 0.45 # 降低中型犬在公寓的分數 + elif 'large' in breed_size: + base_score = 0.05 # 大型犬極度不適合公寓 + elif 'giant' in breed_size: + base_score = 0.01 # 超大型犬完全不適合公寓 + else: + base_score = 0.7 + elif space_req['type'] == 'medium_house': + # 中型房屋的特殊處理 - 適合中等活動量用戶 + if 'small' in breed_size or 'toy' in breed_size: + base_score = 0.9 + elif 'medium' in breed_size: + base_score = 0.95 # 中型犬在中型房屋很適合 + elif 'large' in breed_size: + if 'moderate' in breed_exercise or 'low' in breed_exercise: + base_score = 0.8 # 低運動量大型犬還可以 + else: + base_score = 0.6 # 高運動量大型犬不太適合 + elif 'giant' in breed_size: + base_score = 0.3 # 超大型犬在中型房屋不太適合 + else: + base_score = 0.85 + else: + # 大型房屋的情況 + if 'small' in breed_size or 'toy' in breed_size: + base_score = 0.85 + elif 'medium' in breed_size: + base_score = 0.9 + elif 'large' in breed_size or 'giant' in breed_size: + base_score = 0.95 + else: + base_score = 0.8 + + return min(0.95, base_score) + +def _calculate_exercise_compatibility(exercise_req: dict, breed_exercise: str) -> float: + """計算運動需求相容性分數 - 增強中等活動量處理""" + if exercise_req['level'] == 'low': + if 'low' in breed_exercise or 'minimal' in breed_exercise: + return 0.95 + elif 'moderate' in breed_exercise: + return 0.5 # 降低不匹配分數 + elif 'high' in breed_exercise: + return 0.1 # 進一步降低高運動需求的匹配 + else: + return 0.7 + elif exercise_req['level'] == 'high': + if 'high' in breed_exercise: + return 0.95 + elif 'moderate' in breed_exercise: + return 0.8 + elif 'low' in breed_exercise: + return 0.6 + else: + return 0.7 + else: # moderate - 中等活動量的精確處理 + if 'moderate' in breed_exercise: + return 0.95 # 完美匹配 + elif 'low' in breed_exercise: + return 0.85 # 低運動需求的品種對中等活動量用戶也不錯 + elif 'high' in breed_exercise: + return 0.5 # 中等活動量用戶不太適合高運動需求品種 + else: + return 0.75 + + return 0.6 + +def _calculate_noise_compatibility(noise_req: dict, breed_noise: str) -> float: + """計算噪音相容性分數,更好處理複合等級""" + breed_noise_lower = breed_noise.lower() + + if noise_req['tolerance'] == 'low': + if 'low' in breed_noise_lower and 'moderate' not in breed_noise_lower: + return 0.95 # 純低噪音 + elif 'low-moderate' in breed_noise_lower or 'low to moderate' in breed_noise_lower: + return 0.8 # 低到中等噪音,還可接受 + elif breed_noise_lower in ['moderate']: + return 0.4 # 中等噪音有些問題 + elif 'high' in breed_noise_lower: + return 0.1 # 高噪音不適合 + else: + return 0.6 # 未知噪音水平,保守估計 + elif noise_req['tolerance'] == 'high': + if 'high' in breed_noise_lower: + return 0.9 + elif 'moderate' in breed_noise_lower: + return 0.85 + elif 'low' in breed_noise_lower: + return 0.8 # 安靜犬對高容忍度的人也很好 + else: + return 0.8 + else: # moderate tolerance + if 'moderate' in breed_noise_lower: + return 0.9 + elif 'low' in breed_noise_lower: + return 0.85 + elif 'high' in breed_noise_lower: + return 0.6 + else: + return 0.75 + + return 0.7 + +def _calculate_size_compatibility(size_req: dict, breed_size: str) -> float: + """計算體型相容性分數""" + if size_req['preferred'] == 'small': + if any(word in breed_size for word in ['small', 'toy', 'tiny']): + return 0.9 + elif 'medium' in breed_size: + return 0.6 + else: + return 0.3 + elif size_req['preferred'] == 'large': + if any(word in breed_size for word in ['large', 'giant']): + return 0.9 + elif 'medium' in breed_size: + return 0.7 + else: + return 0.4 + + return 0.7 # 無特別偏好 + +def _calculate_family_compatibility(family_req: dict, good_with_children: str, temperament: str) -> float: + """計算家庭相容性分數""" + if family_req['children']: + if 'yes' in good_with_children.lower(): + return 0.9 + elif any(word in temperament for word in ['gentle', 'patient', 'friendly']): + return 0.8 + elif 'no' in good_with_children.lower(): + return 0.2 + else: + return 0.6 + + return 0.7 + +def _apply_hard_constraints_enhanced(user_desc: str, breed_info: dict) -> float: + """應用品種特性感知的動態懲罰機制""" + penalty = 0.0 + + # 建立懲罰衰減係數和補償機制 + penalty_decay_factor = 0.7 + breed_adaptability_bonus = 0.0 + breed_size = breed_info.get('Size', '').lower() + breed_exercise = breed_info.get('Exercise Needs', '').lower() + breed_name = breed_info.get('Breed', '').replace(' ', '_') + + # 公寓空間約束 - 品種特性感知懲罰機制 + if 'apartment' in user_desc or 'small apartment' in user_desc: + if 'giant' in breed_size: + base_penalty = -0.35 # 減少基礎懲罰 + # 特定品種適應性補償 + adaptable_giants = ['Mastiff', 'Great Dane'] # 相對安靜的巨型犬 + if any(adapt_breed in breed_name for adapt_breed in adaptable_giants): + breed_adaptability_bonus += 0.08 + penalty += base_penalty * penalty_decay_factor + elif 'large' in breed_size: + base_penalty = -0.25 # 減少大型犬懲罰 + # 適合公寓的大型犬補償 + apartment_friendly_large = ['Greyhound', 'Great_Dane'] + if any(apt_breed in breed_name for apt_breed in apartment_friendly_large): + breed_adaptability_bonus += 0.06 + penalty += base_penalty * penalty_decay_factor + elif 'medium' in breed_size and 'high' in breed_exercise: + penalty += -0.15 * penalty_decay_factor # 進一步減少懲罰 + + # 運動需求不匹配 - 品種特性感知懲罰機制 + if any(phrase in user_desc for phrase in ["don't exercise", "not active", "low exercise", "don't exercise much"]): + if 'high' in breed_exercise: + base_penalty = -0.28 # 減少基礎懲罰 + # 低維護高運動犬種補償 + adaptable_high_energy = ['Greyhound', 'Whippet'] # 運動爆發型,平時安靜 + if any(adapt_breed in breed_name for adapt_breed in adaptable_high_energy): + breed_adaptability_bonus += 0.10 + penalty += base_penalty * penalty_decay_factor + elif 'moderate' in breed_exercise: + penalty += -0.08 * penalty_decay_factor # 進一步減少懲罰 + + # 噪音控制需求不匹配 - 品種特性感知懲罰機制 + if any(phrase in user_desc for phrase in ['quiet', "won't bark", "doesn't bark", "silent"]): + breed_noise = breed_noise_info.get(breed_name, {}).get('noise_level', 'moderate').lower() + if 'high' in breed_noise: + base_penalty = -0.18 # 減少基礎懲罰 + # 訓練性良好的高噪音品種補償 + trainable_vocal_breeds = ['German_Shepherd', 'Golden_Retriever'] + if any(train_breed in breed_name for train_breed in trainable_vocal_breeds): + breed_adaptability_bonus += 0.05 + penalty += base_penalty * penalty_decay_factor + elif 'moderate' in breed_noise and 'low' not in breed_noise: + penalty += -0.05 * penalty_decay_factor + + # 體型偏好不匹配 - 漸進式懲罰 + if any(phrase in user_desc for phrase in ['small', 'tiny', 'little']): + if 'giant' in breed_size: + penalty -= 0.35 # 超大型犬懲罰 + elif 'large' in breed_size: + penalty -= 0.20 # 大型犬懲罰 + + # 中等活動量用戶的特殊約束處理 - 漸進式懲罰 + moderate_activity_terms = ['30 minutes', 'half hour', 'moderate', 'balanced', 'medium-sized house'] + if any(term in user_desc for term in moderate_activity_terms): + # 超大型犬對中等活動量用戶的適度懲罰 + giant_breeds = ['Saint Bernard', 'Tibetan Mastiff', 'Great Dane', 'Mastiff', 'Newfoundland'] + if any(giant in breed_name for giant in giant_breeds) or 'giant' in breed_size: + penalty -= 0.35 # 適度懲罰,不完全排除 + + # 中型房屋 + 超大型犬的額外考量 + if 'medium-sized house' in user_desc and any(giant in breed_name for giant in giant_breeds): + if not any(high_activity in user_desc for high_activity in ['hiking', 'running', 'active', 'outdoor activities']): + penalty -= 0.15 # 輕度額外懲罰 + + # 30分鐘散步對極高運動需求品種的懲罰 + if any(term in user_desc for term in ['30 minutes', 'half hour']) and 'walk' in user_desc: + high_energy_breeds = ['Siberian Husky', 'Border Collie', 'Jack Russell Terrier', 'Weimaraner'] + if any(he_breed in breed_name for he_breed in high_energy_breeds) and 'high' in breed_exercise: + penalty -= 0.25 # 適度懲罰極高運動需求品種 + + # 添加特殊品種適應性補償機制 + # 對於邊界適配品種,給予適度補償 + boundary_adaptable_breeds = { + 'Italian_Greyhound': 0.08, # 安靜、低維護的小型犬 + 'Boston_Bull': 0.06, # 適應性強的小型犬 + 'Havanese': 0.05, # 友好適應的小型犬 + 'Silky_terrier': 0.04, # 安靜的玩具犬 + 'Basset': 0.07 # 低能量但友好的中型犬 + } + + if breed_name in boundary_adaptable_breeds: + breed_adaptability_bonus += boundary_adaptable_breeds[breed_name] + + # 應用品種適應性補償並設置懲罰上限 + final_penalty = penalty + breed_adaptability_bonus + # 限制最大懲罰,避免單一約束主導評分 + final_penalty = max(-0.4, final_penalty) + + return final_penalty + +def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: + """基本文字匹配推薦(SBERT 不可用時的後備方案)""" + try: + print("Using basic text matching as fallback...") + + # 基本關鍵字匹配 + keywords = user_description.lower().split() + breed_scores = [] + + # 從數據庫獲取品種清單 + try: + conn = sqlite3.connect('animal_detector.db') + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog LIMIT 50") + basic_breeds = [row[0] for row in cursor.fetchall()] + cursor.close() + conn.close() + except Exception as e: + print(f"Could not load breed list from database: {str(e)}") + # 後備品種清單 + basic_breeds = [ + 'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog', + 'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', + 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih-Tzu', + 'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', + 'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa' + ] + + for breed in basic_breeds: + breed_info = get_dog_description(breed) or {} + breed_text = f"{breed} {breed_info.get('Temperament', '')} {breed_info.get('Size', '')} {breed_info.get('Description', '')}".lower() + + # 計算關鍵字匹配分數 + matches = sum(1 for keyword in keywords if keyword in breed_text) + base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6) + + # 應用增強匹配邏輯 + enhanced_score = _calculate_enhanced_matching_score( + breed, breed_info, user_description, base_score + ) + + breed_scores.append((breed, enhanced_score['final_score'], breed_info, enhanced_score)) + + # 按分數排序 + breed_scores.sort(key=lambda x: x[1], reverse=True) + + recommendations = [] + for i, (breed, final_score, breed_info, enhanced_score) in enumerate(breed_scores[:top_k]): + recommendation = { + 'breed': breed.replace('_', ' '), + 'rank': i + 1, + 'overall_score': final_score, + 'final_score': final_score, + 'semantic_score': enhanced_score.get('weighted_score', final_score), + 'comparative_bonus': enhanced_score.get('lifestyle_bonus', 0.0), + 'lifestyle_bonus': enhanced_score.get('lifestyle_bonus', 0.0), + 'size': breed_info.get('Size', 'Unknown'), + 'temperament': breed_info.get('Temperament', 'Unknown'), + 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), + 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), + 'good_with_children': breed_info.get('Good with Children', 'Unknown'), + 'lifespan': breed_info.get('Lifespan', '10-12 years'), + 'description': breed_info.get('Description', 'No description available'), + 'search_type': 'description', + 'scores': enhanced_score.get('dimension_scores', { + 'space': final_score * 0.9, + 'exercise': final_score * 0.85, + 'grooming': final_score * 0.8, + 'experience': final_score * 0.75, + 'noise': final_score * 0.7, + 'family': final_score * 0.65 + }) + } + recommendations.append(recommendation) + + return recommendations + + except Exception as e: + error_msg = f"Error in basic text matching: {str(e)}" + print(f"ERROR: {error_msg}") + raise RuntimeError(error_msg) from e diff --git a/styles.py b/styles.py index 3bfedcb2c8c410d753b9a44151c95833bdec0ad3..5009f980645e9e13d4e91e4c1c0c1a55ce8a4c0a 100644 --- a/styles.py +++ b/styles.py @@ -1,5 +1,128 @@ + def get_css_styles(): return """ + /* SBERT Natural Language Recommendation Styles */ + button#find-match-btn { + background: linear-gradient(90deg, #ff5f6d 0%, #ffc371 100%) !important; + border: none !important; + border-radius: 30px !important; + padding: 12px 24px !important; + color: white !important; + font-weight: bold !important; + cursor: pointer !important; + transition: all 0.3s ease !important; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; + width: 100% !important; + margin: 20px 0 !important; + font-size: 1.1em !important; + } + button#find-match-btn:hover { + background: linear-gradient(90deg, #ff4f5d 0%, #ffb361 100%) !important; + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.2) !important; + transform: translateY(-2px) !important; + } + button#find-match-btn:active { + transform: translateY(1px) !important; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important; + } + #search-status { + text-align: center; + padding: 15px; + font-size: 1.1em; + color: #666; + margin: 10px 0; + border-radius: 8px; + background: rgba(200, 200, 200, 0.1); + transition: opacity 0.3s ease; + } + + /* Natural Language Search Button Styles */ + button#find-by-description-btn { + background: linear-gradient(90deg, #4299e1 0%, #48bb78 100%) !important; + border: none !important; + border-radius: 30px !important; + padding: 12px 24px !important; + color: white !important; + font-weight: bold !important; + cursor: pointer !important; + transition: all 0.3s ease !important; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; + width: 100% !important; + margin: 20px 0 !important; + font-size: 1.1em !important; + } + button#find-by-description-btn:hover { + background: linear-gradient(90deg, #3182ce 0%, #38a169 100%) !important; + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.2) !important; + transform: translateY(-2px) !important; + } + button#find-by-description-btn:active { + background: linear-gradient(90deg, #2c5aa0 0%, #2f7d32 100%) !important; + transform: translateY(0px) scale(0.98) !important; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important; + } + + /* Description Input Styles */ + .description-input textarea { + border-radius: 10px !important; + border: 2px solid #e2e8f0 !important; + transition: all 0.3s ease !important; + } + .description-input textarea:focus { + border-color: #4299e1 !important; + box-shadow: 0 0 0 3px rgba(66, 153, 225, 0.1) !important; + } + + /* Force override any other styles */ + .gradio-button { + position: relative !important; + overflow: visible !important; + } + + /* Progress bars for semantic recommendations */ + .progress { + transition: all 0.3s ease-in-out; + border-radius: 4px; + height: 12px; + } + .progress-bar { + background-color: #f5f5f5; + border-radius: 4px; + overflow: hidden; + position: relative; + } + .score-item { + margin: 10px 0; + } + .percentage { + margin-left: 8px; + font-weight: 500; + } + + /* History display with colored tags */ + .history-tag-criteria { + background: rgba(72, 187, 120, 0.1); + color: #48bb78; + padding: 4px 8px; + border-radius: 12px; + font-size: 0.8em; + font-weight: 600; + display: inline-flex; + align-items: center; + gap: 4px; + } + .history-tag-description { + background: rgba(66, 153, 225, 0.1); + color: #4299e1; + padding: 4px 8px; + border-radius: 12px; + font-size: 0.8em; + font-weight: 600; + display: inline-flex; + align-items: center; + gap: 4px; + } + .dog-info-card { margin: 0 0 20px 0; padding: 0; @@ -234,7 +357,7 @@ def get_css_styles(): } .breed-name { - font-size: 1.2em !important; # 從 1.5em 改為 1.2em + font-size: 1.2em !important; font-weight: bold; color: #2c3e50; flex-grow: 1; @@ -1070,52 +1193,24 @@ def get_css_styles(): } @media (max-width: 768px) { - .info-cards { - grid-template-columns: 1fr !important; /* 在手機上改為單列 */ - gap: 12px !important; - padding: 10px !important; - width: 100% !important; - box-sizing: border-box !important; - min-height: auto !important; /* 在手機上移除最小高度限制 */ - height: auto !important; /* 允許高度自適應 */ - padding: 12px !important; /* 稍微減少填充 */ - } - - .info-card { - width: 100% !important; - margin: 0 !important; - padding: 12px !important; - min-height: auto !important; /* 移除最小高度限制 */ - height: auto !important; /* 允許高度自適應 */ - overflow: visible !important; /* 確保內容不被切斷 */ + /* 在小螢幕上改為單列顯示 */ + .health-grid, .noise-grid { + grid-template-columns: 1fr; } - .info-card .tooltip { - flex-wrap: wrap !important; /* 在手機版允許換行 */ - } - .info-card span { - display: block !important; /* 確保文字完整顯示 */ - overflow: visible !important; - } - - .tooltip { - width: 100% !important; - display: flex !important; - align-items: center !important; - gap: 8px !important; + /* 減少內邊距 */ + .health-section, .noise-section { + padding: 16px; } - - .tooltip-text { - left: auto !important; - right: 0 !important; - width: 200px !important; + + /* 調整字體大小 */ + .section-header { + font-size: 1rem; } - - /* 確保所有文字可見 */ - .label, .value { - overflow: visible !important; - white-space: normal !important; - word-wrap: break-word !important; + + /* 調整項目內邊距 */ + .health-item, .noise-item { + padding: 10px 14px; } }