Spaces:
Running
on
Zero
Running
on
Zero
| import random | |
| import hashlib | |
| import numpy as np | |
| import sqlite3 | |
| import re | |
| import traceback | |
| from typing import List, Dict, Tuple, Optional, Any | |
| from dataclasses import dataclass | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from dog_database import get_dog_description | |
| from breed_health_info import breed_health_info | |
| from breed_noise_info import breed_noise_info | |
| from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores | |
| from query_understanding import QueryUnderstandingEngine, analyze_user_query | |
| from constraint_manager import ConstraintManager, apply_breed_constraints | |
| from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore | |
| from score_calibrator import ScoreCalibrator, calibrate_breed_scores | |
| from config_manager import get_config_manager, get_standardized_breed_data | |
| from semantic_vector_manager import SemanticVectorManager, BreedDescriptionVector | |
| from user_query_analyzer import UserQueryAnalyzer | |
| from matching_score_calculator import MatchingScoreCalculator | |
| class SemanticBreedRecommender: | |
| """ | |
| 增強的基於 SBERT 的語義品種推薦系統 (Facade Pattern) | |
| 為狗品種推薦提供多維度自然語言理解 | |
| """ | |
| def __init__(self): | |
| """初始化語義品種推薦器""" | |
| # 初始化語義向量管理器 | |
| self.vector_manager = SemanticVectorManager() | |
| # 初始化用戶查詢分析器 | |
| self.query_analyzer = UserQueryAnalyzer(self.vector_manager.get_breed_list()) | |
| # 初始化匹配評分計算器 | |
| self.score_calculator = MatchingScoreCalculator(self.vector_manager.get_breed_list()) | |
| # 保留原有屬性以維持向後兼容性 | |
| self.model_name = self.vector_manager.model_name | |
| self.sbert_model = self.vector_manager.get_sbert_model() | |
| self.breed_vectors = self.vector_manager.get_breed_vectors() | |
| self.breed_list = self.vector_manager.get_breed_list() | |
| self.comparative_keywords = self.query_analyzer.comparative_keywords | |
| # 初始化增強系統組件(如果可用) | |
| try: | |
| self.query_engine = QueryUnderstandingEngine() | |
| self.constraint_manager = ConstraintManager() | |
| self.multi_head_scorer = None | |
| self.score_calibrator = ScoreCalibrator() | |
| self.config_manager = get_config_manager() | |
| # 如果 SBERT 模型可用,初始化多頭評分器 | |
| if self.sbert_model: | |
| self.multi_head_scorer = MultiHeadScorer(self.sbert_model) | |
| print("Multi-head scorer initialized with SBERT model") | |
| except ImportError: | |
| print("Enhanced system components not available, using basic functionality") | |
| self.query_engine = None | |
| self.constraint_manager = None | |
| self.multi_head_scorer = None | |
| self.score_calibrator = None | |
| self.config_manager = None | |
| def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: | |
| """解析比較性偏好表達""" | |
| return self.query_analyzer.parse_comparative_preferences(user_input) | |
| def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: | |
| """增強的生活方式關鍵字提取,具有更好的模式匹配""" | |
| return self.query_analyzer.extract_lifestyle_keywords(user_input) | |
| def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]: | |
| """應用尺寸分佈修正以防止大型品種偏差""" | |
| return self.score_calculator.apply_size_distribution_correction(recommendations) | |
| def _normalize_breed_size(self, size: str) -> str: | |
| """標準化品種尺寸到標準分類""" | |
| return self.score_calculator._normalize_breed_size(size) | |
| def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]: | |
| """更準確地解析用戶需求""" | |
| return self.query_analyzer.parse_user_requirements(user_input) | |
| def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float: | |
| """增強硬約束,具有更嚴格的懲罰""" | |
| return self.score_calculator.apply_hard_constraints(breed, user_input, breed_characteristics) | |
| def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any], | |
| lifestyle_keywords: Dict[str, List[str]]) -> float: | |
| """增強生活方式匹配獎勵計算""" | |
| return self.score_calculator.calculate_lifestyle_bonus(breed_characteristics, lifestyle_keywords) | |
| def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]: | |
| """基於增強關鍵字提取和數據庫挖掘應用智能特徵匹配""" | |
| return self.score_calculator.apply_intelligent_trait_matching(recommendations, user_input) | |
| def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]: | |
| """將標準化品種信息轉換為字典格式""" | |
| return self.score_calculator.get_breed_info_from_standardized(standardized_info) | |
| def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]: | |
| """當增強系統失敗時獲取備用推薦""" | |
| return self.score_calculator.get_fallback_recommendations(top_k) | |
| def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
| """ | |
| 增強的多維度語義品種推薦 | |
| Args: | |
| user_input: 用戶的自然語言描述 | |
| top_k: 返回的推薦數量 | |
| Returns: | |
| 增強評分的推薦品種列表 | |
| """ | |
| try: | |
| # 階段 1: 查詢理解 | |
| if self.query_engine: | |
| dimensions = self.query_engine.analyze_query(user_input) | |
| print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions") | |
| else: | |
| print("Query engine not available, using basic analysis") | |
| return self.get_semantic_recommendations(user_input, top_k) | |
| # 階段 2: 應用約束 | |
| if self.constraint_manager: | |
| filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k)) | |
| print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates") | |
| if not filter_result.passed_breeds: | |
| error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements." | |
| print(f"ERROR: {error_msg}") | |
| raise ValueError(error_msg) | |
| else: | |
| print("Constraint manager not available, using all breeds") | |
| filter_result = type('FilterResult', (), { | |
| 'passed_breeds': self.breed_list, | |
| 'applied_constraints': [], | |
| 'relaxed_constraints': [], | |
| 'warnings': [] | |
| })() | |
| # 階段 3: 多頭評分 | |
| if self.multi_head_scorer: | |
| breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions) | |
| print(f"Multi-head scoring completed for {len(breed_scores)} breeds") | |
| else: | |
| print("Multi-head scorer not available, using fallback scoring") | |
| return self.get_semantic_recommendations(user_input, top_k) | |
| # 階段 4: 分數校準 | |
| if self.score_calibrator: | |
| breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores] | |
| calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples) | |
| print(f"Score calibration: method={calibration_result.calibration_method}") | |
| else: | |
| print("Score calibrator not available, using raw scores") | |
| calibration_result = type('CalibrationResult', (), { | |
| 'score_mapping': {score.breed_name: score.final_score for score in breed_scores}, | |
| 'calibration_method': 'none' | |
| })() | |
| # 階段 5: 生成最終推薦 | |
| final_recommendations = [] | |
| for i, breed_score in enumerate(breed_scores[:top_k]): | |
| breed_name = breed_score.breed_name | |
| # 獲取校準後的分數 | |
| calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score) | |
| # 獲取標準化品種信息 | |
| if self.config_manager: | |
| standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_')) | |
| if standardized_info: | |
| breed_info = self._get_breed_info_from_standardized(standardized_info) | |
| else: | |
| breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} | |
| else: | |
| breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} | |
| recommendation = { | |
| 'breed': breed_name, | |
| 'rank': i + 1, | |
| 'overall_score': calibrated_score, | |
| 'final_score': calibrated_score, | |
| 'semantic_score': breed_score.semantic_component, | |
| 'attribute_score': breed_score.attribute_component, | |
| 'bidirectional_bonus': breed_score.bidirectional_bonus, | |
| 'confidence_score': breed_score.confidence_score, | |
| 'dimensional_breakdown': breed_score.dimensional_breakdown, | |
| 'explanation': breed_score.explanation, | |
| 'size': breed_info.get('Size', 'Unknown'), | |
| 'temperament': breed_info.get('Temperament', ''), | |
| 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
| 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
| 'good_with_children': breed_info.get('Good with Children', 'Yes'), | |
| 'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
| 'description': breed_info.get('Description', ''), | |
| 'search_type': 'enhanced_description', | |
| 'calibration_method': calibration_result.calibration_method, | |
| 'applied_constraints': filter_result.applied_constraints, | |
| 'relaxed_constraints': filter_result.relaxed_constraints, | |
| 'warnings': filter_result.warnings | |
| } | |
| final_recommendations.append(recommendation) | |
| # 應用尺寸分佈修正 | |
| corrected_recommendations = self._apply_size_distribution_correction(final_recommendations) | |
| # 階段 6: 應用智能特徵匹配增強 | |
| intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input) | |
| print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching") | |
| return intelligence_enhanced_recommendations | |
| except Exception as e: | |
| print(f"Error in enhanced semantic recommendations: {str(e)}") | |
| print(traceback.format_exc()) | |
| # 回退到原始方法 | |
| return self.get_semantic_recommendations(user_input, top_k) | |
| def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
| """ | |
| 基於自然語言描述獲取品種推薦 | |
| Args: | |
| user_input: 用戶的自然語言描述 | |
| top_k: 返回的推薦數量 | |
| Returns: | |
| 推薦品種列表 | |
| """ | |
| try: | |
| print(f"Processing user input: {user_input}") | |
| # 檢查模型是否可用 - 如果不可用,則報錯 | |
| if self.sbert_model is None: | |
| error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again." | |
| print(f"ERROR: {error_msg}") | |
| raise RuntimeError(error_msg) | |
| # 生成用戶輸入嵌入 | |
| user_embedding = self.vector_manager.encode_text(user_input) | |
| # 解析比較性偏好 | |
| comparative_prefs = self._parse_comparative_preferences(user_input) | |
| # 提取生活方式關鍵字 | |
| lifestyle_keywords = self._extract_lifestyle_keywords(user_input) | |
| # 計算與所有品種的相似度並應用約束 | |
| similarities = [] | |
| for breed, breed_vector in self.breed_vectors.items(): | |
| # 首先應用硬約束 | |
| constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics) | |
| # 跳過違反關鍵約束的品種 | |
| if constraint_penalty <= -1.0: # 完全取消資格 | |
| continue | |
| # 基本語義相似度 | |
| semantic_score = cosine_similarity( | |
| [user_embedding], | |
| [breed_vector.embedding] | |
| )[0][0] | |
| # 比較性偏好加權 | |
| comparative_bonus = comparative_prefs.get(breed, 0.0) | |
| # 生活方式匹配獎勵 | |
| lifestyle_bonus = self._calculate_lifestyle_bonus( | |
| breed_vector.characteristics, | |
| lifestyle_keywords | |
| ) | |
| # 應用約束懲罰 | |
| lifestyle_bonus += constraint_penalty | |
| # 更好分佈的增強組合分數 | |
| # 應用指數縮放以創建更自然的分數分佈 | |
| base_semantic = semantic_score ** 0.8 # 輕微壓縮高分 | |
| enhanced_lifestyle = lifestyle_bonus * 2.0 # 放大生活方式匹配 | |
| enhanced_comparative = comparative_bonus * 1.5 # 放大品種偏好 | |
| final_score = ( | |
| base_semantic * 0.55 + | |
| enhanced_comparative * 0.30 + | |
| enhanced_lifestyle * 0.15 | |
| ) | |
| # 添加小的隨機變化以自然地打破平局 | |
| random.seed(hash(breed)) # 對相同品種保持一致 | |
| final_score += random.uniform(-0.03, 0.03) | |
| # 確保最終分數不超過 1.0 | |
| final_score = min(1.0, final_score) | |
| similarities.append({ | |
| 'breed': breed, | |
| 'score': final_score, | |
| 'semantic_score': semantic_score, | |
| 'comparative_bonus': comparative_bonus, | |
| 'lifestyle_bonus': lifestyle_bonus | |
| }) | |
| # 計算平衡分佈的標準化顯示分數 | |
| breed_display_scores = [] | |
| # 首先,收集所有語義分數以進行標準化 | |
| all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities] | |
| semantic_mean = np.mean(all_semantic_scores) | |
| semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0 | |
| for breed_data in similarities: | |
| breed = breed_data['breed'] | |
| base_semantic = breed_data['semantic_score'] | |
| # 標準化語義分數以防止極端異常值 | |
| if semantic_std > 0: | |
| normalized_semantic = (base_semantic - semantic_mean) / semantic_std | |
| normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # 限制在 2 個標準差 | |
| scaled_semantic = 0.5 + (normalized_semantic * 0.1) # 映射到 0.3-0.7 範圍 | |
| else: | |
| scaled_semantic = 0.5 | |
| # 獲取品種特徵 | |
| breed_info = get_dog_description(breed) if breed != 'Unknown' else {} | |
| breed_size = breed_info.get('Size', '').lower() if breed_info else '' | |
| exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else '' | |
| # 計算特徵匹配分數(比純語義相似度更重要) | |
| feature_score = 0.0 | |
| user_text = user_input.lower() | |
| # 尺寸和空間需求(高權重) | |
| if any(term in user_text for term in ['apartment', 'small', 'limited space']): | |
| if 'small' in breed_size: | |
| feature_score += 0.25 | |
| elif 'medium' in breed_size: | |
| feature_score += 0.05 | |
| elif 'large' in breed_size or 'giant' in breed_size: | |
| feature_score -= 0.30 | |
| # 運動需求(高權重) | |
| if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']): | |
| if 'low' in exercise_needs or 'minimal' in exercise_needs: | |
| feature_score += 0.20 | |
| elif 'high' in exercise_needs or 'very high' in exercise_needs: | |
| feature_score -= 0.25 | |
| elif any(term in user_text for term in ['active', 'high exercise', 'running', 'hiking']): | |
| if 'high' in exercise_needs: | |
| feature_score += 0.20 | |
| elif 'low' in exercise_needs: | |
| feature_score -= 0.15 | |
| # 家庭相容性 | |
| if any(term in user_text for term in ['children', 'kids', 'family']): | |
| good_with_children = breed_info.get('Good with Children', '') if breed_info else '' | |
| if good_with_children == 'Yes': | |
| feature_score += 0.10 | |
| elif good_with_children == 'No': | |
| feature_score -= 0.20 | |
| # 平衡權重組合分數 | |
| final_score = ( | |
| scaled_semantic * 0.35 + # 降低語義權重 | |
| feature_score * 0.45 + # 增加特徵匹配權重 | |
| breed_data['lifestyle_bonus'] * 0.15 + | |
| breed_data['comparative_bonus'] * 0.05 | |
| ) | |
| # 計算基本相容性分數 | |
| base_compatibility = final_score | |
| # 應用自然分佈的動態評分 | |
| if base_compatibility >= 0.9: # 例外匹配 | |
| score_range = (0.92, 0.98) | |
| position = (base_compatibility - 0.9) / 0.1 | |
| elif base_compatibility >= 0.75: # 優秀匹配 | |
| score_range = (0.85, 0.91) | |
| position = (base_compatibility - 0.75) / 0.15 | |
| elif base_compatibility >= 0.6: # 良好匹配 | |
| score_range = (0.75, 0.84) | |
| position = (base_compatibility - 0.6) / 0.15 | |
| elif base_compatibility >= 0.45: # 公平匹配 | |
| score_range = (0.65, 0.74) | |
| position = (base_compatibility - 0.45) / 0.15 | |
| elif base_compatibility >= 0.3: # 較差匹配 | |
| score_range = (0.55, 0.64) | |
| position = (base_compatibility - 0.3) / 0.15 | |
| else: # 非常差的匹配 | |
| score_range = (0.45, 0.54) | |
| position = max(0, base_compatibility / 0.3) | |
| # 計算帶自然變化的最終分數 | |
| score_span = score_range[1] - score_range[0] | |
| base_score = score_range[0] + (position * score_span) | |
| # 添加控制的隨機變化以進行自然排名 | |
| random.seed(hash(breed + user_input[:15])) | |
| variation = random.uniform(-0.015, 0.015) | |
| display_score = round(max(0.45, min(0.98, base_score + variation)), 3) | |
| breed_display_scores.append({ | |
| 'breed': breed, | |
| 'display_score': display_score, | |
| 'semantic_score': base_semantic, | |
| 'comparative_bonus': breed_data['comparative_bonus'], | |
| 'lifestyle_bonus': breed_data['lifestyle_bonus'] | |
| }) | |
| # 按顯示分數排序以確保排名一致性 | |
| breed_display_scores.sort(key=lambda x: x['display_score'], reverse=True) | |
| top_breeds = breed_display_scores[:top_k] | |
| # 轉換為標準推薦格式 | |
| recommendations = [] | |
| for i, breed_data in enumerate(top_breeds): | |
| breed = breed_data['breed'] | |
| display_score = breed_data['display_score'] | |
| # 獲取詳細信息 | |
| breed_info = get_dog_description(breed) | |
| recommendation = { | |
| 'breed': breed.replace('_', ' '), | |
| 'rank': i + 1, | |
| 'overall_score': display_score, # 使用顯示分數以保持一致性 | |
| 'final_score': display_score, # 確保 final_score 與 overall_score 匹配 | |
| 'semantic_score': breed_data['semantic_score'], | |
| 'comparative_bonus': breed_data['comparative_bonus'], | |
| 'lifestyle_bonus': breed_data['lifestyle_bonus'], | |
| 'size': breed_info.get('Size', 'Unknown') if breed_info else 'Unknown', | |
| 'temperament': breed_info.get('Temperament', '') if breed_info else '', | |
| 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', | |
| 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', | |
| 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', | |
| 'lifespan': breed_info.get('Lifespan', '10-12 years') if breed_info else '10-12 years', | |
| 'description': breed_info.get('Description', '') if breed_info else '', | |
| 'search_type': 'description' | |
| } | |
| recommendations.append(recommendation) | |
| print(f"Generated {len(recommendations)} semantic recommendations") | |
| return recommendations | |
| except Exception as e: | |
| print(f"Failed to generate semantic recommendations: {str(e)}") | |
| print(traceback.format_exc()) | |
| return [] | |
| def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
| """簡化的增強推薦方法""" | |
| try: | |
| print(f"Processing enhanced recommendation: {user_input[:50]}...") | |
| # 使用基本語意匹配 | |
| return self.get_semantic_recommendations(user_input, top_k) | |
| except Exception as e: | |
| error_msg = f"Enhanced recommendation error: {str(e)}. Please check your description." | |
| print(f"ERROR: {error_msg}") | |
| print(traceback.format_exc()) | |
| raise RuntimeError(error_msg) from e | |
| def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: | |
| """增強用戶描述分析""" | |
| return self.query_analyzer.analyze_user_description_enhanced(user_description) | |
| def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences: | |
| """從分析結果創建用戶偏好物件""" | |
| return self.query_analyzer.create_user_preferences_from_analysis_enhanced(analysis) | |
| def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: | |
| """獲取候選品種列表""" | |
| return self.query_analyzer.get_candidate_breeds_enhanced(analysis) | |
| def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float: | |
| """應用約束過濾,返回調整分數""" | |
| # 這個方法需要從 score_calculator 調用適當的方法 | |
| # 但原始實現中沒有這個具體方法,所以我們提供基本實現 | |
| constraint_penalty = 0.0 | |
| breed_info = get_dog_description(breed) | |
| if not breed_info: | |
| return constraint_penalty | |
| # 低噪音要求 | |
| if 'low_noise' in analysis['constraint_requirements']: | |
| noise_info = breed_noise_info.get(breed, {}) | |
| noise_level = noise_info.get('noise_level', 'moderate').lower() | |
| if 'high' in noise_level: | |
| constraint_penalty -= 0.3 # 嚴重扣分 | |
| elif 'low' in noise_level: | |
| constraint_penalty += 0.1 # 輕微加分 | |
| # 公寓適合性 | |
| if 'apartment_suitable' in analysis['constraint_requirements']: | |
| size = breed_info.get('Size', '').lower() | |
| exercise_needs = breed_info.get('Exercise Needs', '').lower() | |
| if size in ['large', 'giant']: | |
| constraint_penalty -= 0.2 | |
| elif size in ['small', 'tiny']: | |
| constraint_penalty += 0.1 | |
| if 'high' in exercise_needs: | |
| constraint_penalty -= 0.15 | |
| # 兒童友善性 | |
| if 'child_friendly' in analysis['constraint_requirements']: | |
| good_with_children = breed_info.get('Good with Children', 'Unknown') | |
| if good_with_children == 'Yes': | |
| constraint_penalty += 0.15 | |
| elif good_with_children == 'No': | |
| constraint_penalty -= 0.4 # 嚴重扣分 | |
| return constraint_penalty | |
| def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]: | |
| """獲取品種特徵""" | |
| return self.score_calculator.get_breed_characteristics_enhanced(breed) | |
| def get_hybrid_recommendations(self, user_description: str, | |
| user_preferences: Optional[Any] = None, | |
| top_k: int = 15) -> List[Dict[str, Any]]: | |
| """ | |
| 混合推薦:結合語義匹配與傳統評分 | |
| Args: | |
| user_description: 用戶的自然語言描述 | |
| user_preferences: 可選的結構化偏好設置 | |
| top_k: 返回的推薦數量 | |
| Returns: | |
| 混合推薦結果 | |
| """ | |
| try: | |
| # 獲取語義推薦 | |
| semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2) | |
| if not user_preferences: | |
| return semantic_recommendations[:top_k] | |
| # 與傳統評分結合 | |
| hybrid_results = [] | |
| for semantic_rec in semantic_recommendations: | |
| breed_name = semantic_rec['breed'].replace(' ', '_') | |
| # 計算傳統相容性分數 | |
| traditional_score = calculate_compatibility_score(user_preferences, breed_name) | |
| # 混合分數(語義 40% + 傳統 60%) | |
| hybrid_score = ( | |
| semantic_rec['overall_score'] * 0.4 + | |
| traditional_score * 0.6 | |
| ) | |
| semantic_rec['hybrid_score'] = hybrid_score | |
| semantic_rec['traditional_score'] = traditional_score | |
| hybrid_results.append(semantic_rec) | |
| # 按混合分數重新排序 | |
| hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True) | |
| # 更新排名 | |
| for i, result in enumerate(hybrid_results[:top_k]): | |
| result['rank'] = i + 1 | |
| result['overall_score'] = result['hybrid_score'] | |
| return hybrid_results[:top_k] | |
| except Exception as e: | |
| print(f"Hybrid recommendation failed: {str(e)}") | |
| print(traceback.format_exc()) | |
| return self.get_semantic_recommendations(user_description, top_k) | |
| def get_breed_recommendations_by_description(user_description: str, | |
| user_preferences: Optional[Any] = None, | |
| top_k: int = 15) -> List[Dict[str, Any]]: | |
| """基於描述獲取品種推薦的主要介面函數""" | |
| try: | |
| print("Initializing Enhanced SemanticBreedRecommender...") | |
| recommender = SemanticBreedRecommender() | |
| # 優先使用整合統一評分系統的增強推薦 | |
| print("Using enhanced recommendation system with unified scoring") | |
| results = recommender.get_enhanced_recommendations_with_unified_scoring(user_description, top_k) | |
| if results and len(results) > 0: | |
| print(f"Generated {len(results)} enhanced recommendations successfully") | |
| return results | |
| else: | |
| # 如果增強系統無結果,嘗試原有增強系統 | |
| print("Enhanced unified system returned no results, trying original enhanced system") | |
| results = recommender.get_enhanced_semantic_recommendations(user_description, top_k) | |
| if results and len(results) > 0: | |
| return results | |
| else: | |
| # 最後回退到標準系統 | |
| print("All enhanced systems failed, using standard system") | |
| if user_preferences: | |
| results = recommender.get_hybrid_recommendations(user_description, user_preferences, top_k) | |
| else: | |
| results = recommender.get_semantic_recommendations(user_description, top_k) | |
| if not results: | |
| error_msg = f"All recommendation systems failed to generate results. Please check your input description and try again. Error details may be in the console." | |
| print(f"ERROR: {error_msg}") | |
| raise RuntimeError(error_msg) | |
| return results | |
| except Exception as e: | |
| error_msg = f"Critical error in recommendation system: {str(e)}. Please check your input and system configuration." | |
| print(f"ERROR: {error_msg}") | |
| print(traceback.format_exc()) | |
| raise RuntimeError(error_msg) from e | |
| def get_enhanced_recommendations_with_unified_scoring(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
| """簡化版本:基本語意推薦功能""" | |
| try: | |
| print(f"Processing description-based recommendation: {user_description[:50]}...") | |
| # 創建基本推薦器實例 | |
| recommender = SemanticBreedRecommender() | |
| if not recommender.vector_manager.is_model_available(): | |
| print("SBERT model not available, using basic text matching...") | |
| # 使用基本文字匹配邏輯 | |
| return _get_basic_text_matching_recommendations(user_description, top_k, recommender) | |
| # 使用語意相似度推薦 | |
| recommendations = [] | |
| user_embedding = recommender.vector_manager.encode_text(user_description) | |
| # 計算所有品種的增強分數 | |
| all_breed_scores = [] | |
| for breed_name, breed_vector in recommender.breed_vectors.items(): | |
| breed_embedding = breed_vector.embedding | |
| similarity = cosine_similarity([user_embedding], [breed_embedding])[0][0] | |
| # 獲取品種資料 | |
| breed_info = get_dog_description(breed_name) or {} | |
| # 計算增強的匹配分數 | |
| enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score( | |
| breed_name, breed_info, user_description, similarity | |
| ) | |
| all_breed_scores.append((breed_name, enhanced_score, breed_info, similarity)) | |
| # 按 final_score 排序(而不是語意相似度) | |
| all_breed_scores.sort(key=lambda x: x[1]['final_score'], reverse=True) | |
| top_breeds = all_breed_scores[:top_k] | |
| for i, (breed, enhanced_score, breed_info, similarity) in enumerate(top_breeds): | |
| recommendation = { | |
| 'breed': breed.replace('_', ' '), | |
| 'rank': i + 1, # 正確的排名 | |
| 'overall_score': enhanced_score['final_score'], | |
| 'final_score': enhanced_score['final_score'], | |
| 'semantic_score': similarity, | |
| 'comparative_bonus': enhanced_score['lifestyle_bonus'], | |
| 'lifestyle_bonus': enhanced_score['lifestyle_bonus'], | |
| 'size': breed_info.get('Size', 'Unknown'), | |
| 'temperament': breed_info.get('Temperament', 'Unknown'), | |
| 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
| 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
| 'good_with_children': breed_info.get('Good with Children', 'Unknown'), | |
| 'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
| 'description': breed_info.get('Description', 'No description available'), | |
| 'search_type': 'description', | |
| 'scores': enhanced_score['dimension_scores'] | |
| } | |
| recommendations.append(recommendation) | |
| print(f"Generated {len(recommendations)} semantic recommendations") | |
| return recommendations | |
| except Exception as e: | |
| error_msg = f"Error in semantic recommendation system: {str(e)}. Please check your input and try again." | |
| print(f"ERROR: {error_msg}") | |
| print(traceback.format_exc()) | |
| raise RuntimeError(error_msg) from e | |
| def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15, recommender=None) -> List[Dict[str, Any]]: | |
| """基本文字匹配推薦(SBERT 不可用時的後備方案)""" | |
| try: | |
| print("Using basic text matching as fallback...") | |
| # 如果沒有提供 recommender,創建一個新的 | |
| if recommender is None: | |
| recommender = SemanticBreedRecommender() | |
| # 基本關鍵字匹配 | |
| keywords = user_description.lower().split() | |
| breed_scores = [] | |
| # 從數據庫獲取品種清單或使用預設清單 | |
| try: | |
| conn = sqlite3.connect('animal_detector.db') | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog LIMIT 50") | |
| basic_breeds = [row[0] for row in cursor.fetchall()] | |
| cursor.close() | |
| conn.close() | |
| # 過濾掉野生動物品種 | |
| basic_breeds = [breed for breed in basic_breeds if breed != 'Dhole'] | |
| except Exception as e: | |
| print(f"Could not load breed list from database: {str(e)}") | |
| # 後備品種清單 | |
| basic_breeds = [ | |
| 'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog', | |
| 'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', | |
| 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih_Tzu', | |
| 'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', | |
| 'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa' | |
| ] | |
| for breed in basic_breeds: | |
| breed_info = get_dog_description(breed) or {} | |
| breed_text = f"{breed} {breed_info.get('Temperament', '')} {breed_info.get('Size', '')} {breed_info.get('Description', '')}".lower() | |
| # 計算關鍵字匹配分數 | |
| matches = sum(1 for keyword in keywords if keyword in breed_text) | |
| base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6) | |
| # 應用增強匹配邏輯 | |
| enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score( | |
| breed, breed_info, user_description, base_score | |
| ) | |
| breed_scores.append((breed, enhanced_score['final_score'], breed_info, enhanced_score)) | |
| # 按分數排序 | |
| breed_scores.sort(key=lambda x: x[1], reverse=True) | |
| recommendations = [] | |
| for i, (breed, final_score, breed_info, enhanced_score) in enumerate(breed_scores[:top_k]): | |
| recommendation = { | |
| 'breed': breed.replace('_', ' '), | |
| 'rank': i + 1, | |
| 'overall_score': final_score, | |
| 'final_score': final_score, | |
| 'semantic_score': enhanced_score.get('weighted_score', final_score), | |
| 'comparative_bonus': enhanced_score.get('lifestyle_bonus', 0.0), | |
| 'lifestyle_bonus': enhanced_score.get('lifestyle_bonus', 0.0), | |
| 'size': breed_info.get('Size', 'Unknown'), | |
| 'temperament': breed_info.get('Temperament', 'Unknown'), | |
| 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
| 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
| 'good_with_children': breed_info.get('Good with Children', 'Unknown'), | |
| 'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
| 'description': breed_info.get('Description', 'No description available'), | |
| 'search_type': 'description', | |
| 'scores': enhanced_score.get('dimension_scores', { | |
| 'space': final_score * 0.9, | |
| 'exercise': final_score * 0.85, | |
| 'grooming': final_score * 0.8, | |
| 'experience': final_score * 0.75, | |
| 'noise': final_score * 0.7, | |
| 'family': final_score * 0.65 | |
| }) | |
| } | |
| recommendations.append(recommendation) | |
| return recommendations | |
| except Exception as e: | |
| error_msg = f"Error in basic text matching: {str(e)}" | |
| print(f"ERROR: {error_msg}") | |
| raise RuntimeError(error_msg) from e | |