diff --git a/breed_recommendation_enhanced.py b/breed_recommendation_enhanced.py
index c0854fa6df85c3334be3944691b6cda0721ad402..438c0bf6e94aab0904caf27fbf9f9e89e33e3e02 100644
--- a/breed_recommendation_enhanced.py
+++ b/breed_recommendation_enhanced.py
@@ -30,6 +30,8 @@ def create_description_examples():
gap: 15px;
margin-top: 10px;
'>
+
+
- 🏠 Living Environment:
+ 🏡 Active Lifestyle & Space:
- "I live in an apartment and need a quiet, small dog that's good with children"
+ "I live in a large house with a big backyard, and I love hiking and outdoor activities. I don't mind if the dog is noisy, as long as it's active and playful."
+
- 🎾 Activity Preferences:
+ 🎾 Activity Preferences:
"I want an active medium to large dog for hiking and outdoor activities"
+
- ❤️ Breed Preferences:
+ 🚶 Balanced Daily Routine:
- "I love Border Collies most, then Golden Retrievers, followed by Pugs"
+ "I live in a medium-sized house, walk about 30 minutes every day, and I'm okay with a moderately vocal dog. Looking for a balanced companion."
+
- 👥 Family Situation:
+ 👥 Family Situation:
"Looking for a calm, low-maintenance companion dog for elderly person"
@@ -98,7 +103,6 @@ def create_description_examples():
"""
-
def create_recommendation_tab(
UserPreferences,
get_breed_recommendations,
@@ -110,7 +114,7 @@ def create_recommendation_tab(
with gr.TabItem("Breed Recommendation"):
with gr.Tabs():
# --------------------------
- # Tab 1: Find by Criteria
+ # Find by Criteria
# --------------------------
with gr.Tab("Find by Criteria"):
gr.HTML("""
@@ -334,7 +338,7 @@ def create_recommendation_tab(
)
# --------------------------
- # Tab 2: Find by Description
+ # Find by Description
# --------------------------
with gr.Tab("Find by Description") as description_tab:
gr.HTML("""
@@ -639,4 +643,4 @@ def create_recommendation_tab(
'criteria_results': locals().get('criteria_results'),
'description_results': locals().get('description_results'),
'description_input': locals().get('description_input')
- }
\ No newline at end of file
+ }
diff --git a/matching_score_calculator.py b/matching_score_calculator.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef8a5d02dcf463dcf8af920f698c9324e934dc72
--- /dev/null
+++ b/matching_score_calculator.py
@@ -0,0 +1,974 @@
+import random
+import hashlib
+import numpy as np
+import sqlite3
+import re
+import traceback
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+from sentence_transformers import SentenceTransformer
+import torch
+from sklearn.metrics.pairwise import cosine_similarity
+from dog_database import get_dog_description
+from breed_health_info import breed_health_info
+from breed_noise_info import breed_noise_info
+from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores
+from query_understanding import QueryUnderstandingEngine, analyze_user_query
+from constraint_manager import ConstraintManager, apply_breed_constraints
+from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore
+from score_calibrator import ScoreCalibrator, calibrate_breed_scores
+from config_manager import get_config_manager, get_standardized_breed_data
+
+class MatchingScoreCalculator:
+ """
+ 匹配評分計算器
+ 處理多維度匹配計算、約束條件過濾和評分校準
+ """
+
+ def __init__(self, breed_list: List[str]):
+ """初始化匹配評分計算器"""
+ self.breed_list = breed_list
+
+ def apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]:
+ """應用尺寸分佈修正以防止大型品種偏差"""
+ if len(recommendations) < 10:
+ return recommendations
+
+ # 分析尺寸分佈
+ size_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0}
+
+ for rec in recommendations:
+ breed_info = get_dog_description(rec['breed'])
+ if breed_info:
+ size = self._normalize_breed_size(breed_info.get('Size', 'Medium'))
+ size_counts[size] += 1
+
+ total_recs = len(recommendations)
+ large_giant_ratio = (size_counts['large'] + size_counts['giant']) / total_recs
+
+ # 如果超過 70% 是大型/巨型品種,應用修正
+ if large_giant_ratio > 0.7:
+ corrected_recommendations = []
+ size_quotas = {'toy': 2, 'small': 4, 'medium': 6, 'large': 2, 'giant': 1}
+ current_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0}
+
+ # 第一輪:在配額內添加品種
+ for rec in recommendations:
+ breed_info = get_dog_description(rec['breed'])
+ if breed_info:
+ size = self._normalize_breed_size(breed_info.get('Size', 'Medium'))
+ if current_counts[size] < size_quotas[size]:
+ corrected_recommendations.append(rec)
+ current_counts[size] += 1
+
+ # 第二輪:用最佳剩餘候選品種填滿剩餘位置
+ remaining_slots = 15 - len(corrected_recommendations)
+ remaining_breeds = [rec for rec in recommendations if rec not in corrected_recommendations]
+
+ corrected_recommendations.extend(remaining_breeds[:remaining_slots])
+ return corrected_recommendations
+
+ return recommendations
+
+ def _normalize_breed_size(self, size: str) -> str:
+ """標準化品種尺寸到標準分類"""
+ if not isinstance(size, str):
+ return 'medium'
+
+ size_lower = size.lower()
+ if any(term in size_lower for term in ['toy', 'tiny']):
+ return 'toy'
+ elif 'small' in size_lower:
+ return 'small'
+ elif 'medium' in size_lower:
+ return 'medium'
+ elif 'large' in size_lower:
+ return 'large'
+ elif any(term in size_lower for term in ['giant', 'extra large']):
+ return 'giant'
+ else:
+ return 'medium'
+
+ def apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float:
+ """增強硬約束,具有更嚴格的懲罰"""
+ penalty = 0.0
+ user_text_lower = user_input.lower()
+
+ # 獲取品種信息
+ breed_info = get_dog_description(breed)
+ if not breed_info:
+ return 0.0
+
+ breed_size = breed_info.get('Size', '').lower()
+ exercise_needs = breed_info.get('Exercise Needs', '').lower()
+
+ # 公寓居住約束 - 更嚴格
+ if any(term in user_text_lower for term in ['apartment', 'flat', 'studio', 'small space']):
+ if 'giant' in breed_size:
+ return -2.0 # 完全淘汰
+ elif 'large' in breed_size:
+ if any(term in exercise_needs for term in ['high', 'very high']):
+ return -2.0 # 完全淘汰
+ else:
+ penalty -= 0.5 # 仍有顯著懲罰
+ elif 'medium' in breed_size and 'very high' in exercise_needs:
+ penalty -= 0.6
+
+ # 運動不匹配約束
+ if "don't exercise much" in user_text_lower or "low exercise" in user_text_lower:
+ if any(term in exercise_needs for term in ['very high', 'extreme', 'intense']):
+ return -2.0 # 完全淘汰
+ elif 'high' in exercise_needs:
+ penalty -= 0.8
+
+ # 中等生活方式檢測
+ if any(term in user_text_lower for term in ['moderate', 'balanced', '30 minutes', 'half hour']):
+ # 懲罰極端情況
+ if 'giant' in breed_size:
+ penalty -= 0.7 # 對巨型犬的強懲罰
+ elif 'very high' in exercise_needs:
+ penalty -= 0.5
+
+ # 兒童安全(現有邏輯保持但增強)
+ if any(term in user_text_lower for term in ['child', 'kids', 'family', 'baby']):
+ good_with_children = breed_info.get('Good with Children', '').lower()
+ if good_with_children == 'no':
+ return -2.0 # 為了安全完全淘汰
+
+ return penalty
+
+ def calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any],
+ lifestyle_keywords: Dict[str, List[str]]) -> float:
+ """增強生活方式匹配獎勵計算"""
+ bonus = 0.0
+ penalties = 0.0
+
+ # 增強尺寸匹配
+ breed_size = breed_characteristics.get('size', '').lower()
+ size_prefs = lifestyle_keywords.get('size_preference', [])
+ for pref in size_prefs:
+ if pref in breed_size:
+ bonus += 0.25 # 尺寸匹配的強獎勵
+ elif (pref == 'small' and 'large' in breed_size) or \
+ (pref == 'large' and 'small' in breed_size):
+ penalties += 0.15 # 尺寸不匹配的懲罰
+
+ # 增強活動水平匹配
+ breed_exercise = breed_characteristics.get('exercise_needs', '').lower()
+ activity_prefs = lifestyle_keywords.get('activity_level', [])
+
+ if 'high' in activity_prefs:
+ if 'high' in breed_exercise or 'very high' in breed_exercise:
+ bonus += 0.2
+ elif 'low' in breed_exercise:
+ penalties += 0.2
+ elif 'low' in activity_prefs:
+ if 'low' in breed_exercise:
+ bonus += 0.2
+ elif 'high' in breed_exercise or 'very high' in breed_exercise:
+ penalties += 0.25
+ elif 'moderate' in activity_prefs:
+ if 'moderate' in breed_exercise:
+ bonus += 0.15
+
+ # 增強家庭情況匹配
+ good_with_children = breed_characteristics.get('good_with_children', 'Yes')
+ family_prefs = lifestyle_keywords.get('family_situation', [])
+
+ if 'children' in family_prefs:
+ if good_with_children == 'Yes':
+ bonus += 0.15
+ else:
+ penalties += 0.3 # 對非兒童友好品種的強懲罰
+
+ # 增強居住空間匹配
+ living_prefs = lifestyle_keywords.get('living_space', [])
+ if 'apartment' in living_prefs:
+ if 'small' in breed_size:
+ bonus += 0.2
+ elif 'medium' in breed_size and 'low' in breed_exercise:
+ bonus += 0.1
+ elif 'large' in breed_size or 'giant' in breed_size:
+ penalties += 0.2 # 公寓中大型犬的懲罰
+
+ # 噪音偏好匹配
+ noise_prefs = lifestyle_keywords.get('noise_preference', [])
+ temperament = breed_characteristics.get('temperament', '').lower()
+
+ if 'low' in noise_prefs:
+ # 獎勵安靜品種
+ if any(term in temperament for term in ['gentle', 'calm', 'quiet']):
+ bonus += 0.1
+
+ # 照護水平匹配
+ grooming_needs = breed_characteristics.get('grooming_needs', '').lower()
+ care_prefs = lifestyle_keywords.get('care_level', [])
+
+ if 'low' in care_prefs and 'low' in grooming_needs:
+ bonus += 0.1
+ elif 'high' in care_prefs and 'high' in grooming_needs:
+ bonus += 0.1
+ elif 'low' in care_prefs and 'high' in grooming_needs:
+ penalties += 0.15
+
+ # 特殊需求匹配
+ special_needs = lifestyle_keywords.get('special_needs', [])
+
+ if 'guard' in special_needs:
+ if any(term in temperament for term in ['protective', 'alert', 'watchful']):
+ bonus += 0.1
+ elif 'companion' in special_needs:
+ if any(term in temperament for term in ['affectionate', 'gentle', 'loyal']):
+ bonus += 0.1
+
+ # 計算包含懲罰的最終獎勵
+ final_bonus = bonus - penalties
+ return max(-0.3, min(0.5, final_bonus)) # 允許負獎勵但限制範圍
+
+ def apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]:
+ """基於增強關鍵字提取和數據庫挖掘應用智能特徵匹配"""
+ try:
+ # 從用戶輸入提取增強關鍵字
+ extracted_keywords = self._extract_enhanced_lifestyle_keywords(user_input)
+
+ # 對每個推薦應用智能特徵匹配
+ enhanced_recommendations = []
+
+ for rec in recommendations:
+ breed_name = rec['breed'].replace(' ', '_')
+
+ # 獲取品種數據庫信息
+ breed_info = get_dog_description(breed_name) or {}
+
+ # 計算智能特徵獎勵
+ intelligence_bonus = 0.0
+ trait_match_details = {}
+
+ # 1. 智力匹配
+ if extracted_keywords.get('intelligence_preference'):
+ intelligence_pref = extracted_keywords['intelligence_preference'][0]
+ breed_desc = breed_info.get('Description', '').lower()
+
+ if intelligence_pref == 'high':
+ if any(word in breed_desc for word in ['intelligent', 'smart', 'clever', 'quick to learn', 'trainable']):
+ intelligence_bonus += 0.05
+ trait_match_details['intelligence_match'] = 'High intelligence match detected'
+ elif any(word in breed_desc for word in ['stubborn', 'independent', 'difficult']):
+ intelligence_bonus -= 0.02
+ trait_match_details['intelligence_warning'] = 'May be challenging to train'
+
+ elif intelligence_pref == 'independent':
+ if any(word in breed_desc for word in ['independent', 'stubborn', 'strong-willed']):
+ intelligence_bonus += 0.03
+ trait_match_details['independence_match'] = 'Independent nature match'
+
+ # 2. 美容偏好匹配
+ if extracted_keywords.get('grooming_preference'):
+ grooming_pref = extracted_keywords['grooming_preference'][0]
+ breed_grooming = breed_info.get('Grooming Needs', '').lower()
+
+ if grooming_pref == 'low' and 'low' in breed_grooming:
+ intelligence_bonus += 0.03
+ trait_match_details['grooming_match'] = 'Low maintenance grooming match'
+ elif grooming_pref == 'high' and 'high' in breed_grooming:
+ intelligence_bonus += 0.03
+ trait_match_details['grooming_match'] = 'High maintenance grooming match'
+ elif grooming_pref == 'low' and 'high' in breed_grooming:
+ intelligence_bonus -= 0.04
+ trait_match_details['grooming_mismatch'] = 'High grooming needs may not suit preferences'
+
+ # 3. 氣質偏好匹配
+ if extracted_keywords.get('temperament_preference'):
+ temp_prefs = extracted_keywords['temperament_preference']
+ breed_temperament = breed_info.get('Temperament', '').lower()
+ breed_desc = breed_info.get('Description', '').lower()
+
+ temp_text = (breed_temperament + ' ' + breed_desc).lower()
+
+ for temp_pref in temp_prefs:
+ if temp_pref == 'gentle' and any(word in temp_text for word in ['gentle', 'calm', 'peaceful', 'mild']):
+ intelligence_bonus += 0.04
+ trait_match_details['temperament_match'] = f'Gentle temperament match: {temp_pref}'
+ elif temp_pref == 'playful' and any(word in temp_text for word in ['playful', 'energetic', 'lively', 'fun']):
+ intelligence_bonus += 0.04
+ trait_match_details['temperament_match'] = f'Playful temperament match: {temp_pref}'
+ elif temp_pref == 'protective' and any(word in temp_text for word in ['protective', 'guard', 'alert', 'watchful']):
+ intelligence_bonus += 0.04
+ trait_match_details['temperament_match'] = f'Protective temperament match: {temp_pref}'
+ elif temp_pref == 'friendly' and any(word in temp_text for word in ['friendly', 'social', 'outgoing', 'people']):
+ intelligence_bonus += 0.04
+ trait_match_details['temperament_match'] = f'Friendly temperament match: {temp_pref}'
+
+ # 4. 經驗水平匹配
+ if extracted_keywords.get('experience_level'):
+ exp_level = extracted_keywords['experience_level'][0]
+ breed_desc = breed_info.get('Description', '').lower()
+
+ if exp_level == 'beginner':
+ # 為初學者偏愛易於處理的品種
+ if any(word in breed_desc for word in ['easy', 'gentle', 'good for beginners', 'family', 'calm']):
+ intelligence_bonus += 0.06
+ trait_match_details['beginner_friendly'] = 'Good choice for first-time owners'
+ elif any(word in breed_desc for word in ['challenging', 'dominant', 'requires experience', 'strong-willed']):
+ intelligence_bonus -= 0.08
+ trait_match_details['experience_warning'] = 'May be challenging for first-time owners'
+
+ elif exp_level == 'advanced':
+ # 高級用戶可以處理更具挑戰性的品種
+ if any(word in breed_desc for word in ['working', 'requires experience', 'intelligent', 'strong']):
+ intelligence_bonus += 0.03
+ trait_match_details['advanced_suitable'] = 'Good match for experienced owners'
+
+ # 5. 壽命偏好匹配
+ if extracted_keywords.get('lifespan_preference'):
+ lifespan_pref = extracted_keywords['lifespan_preference'][0]
+ breed_lifespan = breed_info.get('Lifespan', '10-12 years')
+
+ try:
+ import re
+ years = re.findall(r'\d+', breed_lifespan)
+ if years:
+ avg_years = sum(int(y) for y in years) / len(years)
+ if lifespan_pref == 'long' and avg_years >= 13:
+ intelligence_bonus += 0.02
+ trait_match_details['longevity_match'] = f'Long lifespan match: {breed_lifespan}'
+ elif lifespan_pref == 'healthy' and avg_years >= 12:
+ intelligence_bonus += 0.02
+ trait_match_details['health_match'] = f'Healthy lifespan: {breed_lifespan}'
+ except:
+ pass
+
+ # 將智力獎勵應用到總分
+ original_score = rec['overall_score']
+ enhanced_score = min(1.0, original_score + intelligence_bonus)
+
+ # 創建包含特徵匹配詳細信息的增強推薦
+ enhanced_rec = rec.copy()
+ enhanced_rec['overall_score'] = enhanced_score
+ enhanced_rec['intelligence_bonus'] = intelligence_bonus
+ enhanced_rec['trait_match_details'] = trait_match_details
+
+ # 如果發生顯著增強,添加詳細說明
+ if abs(intelligence_bonus) > 0.02:
+ enhancement_explanation = []
+ for detail_key, detail_value in trait_match_details.items():
+ enhancement_explanation.append(detail_value)
+
+ if enhancement_explanation:
+ current_explanation = enhanced_rec.get('explanation', '')
+ enhanced_explanation = current_explanation + f" Enhanced matching: {'; '.join(enhancement_explanation)}"
+ enhanced_rec['explanation'] = enhanced_explanation
+
+ enhanced_recommendations.append(enhanced_rec)
+
+ # 按增強總分重新排序
+ enhanced_recommendations.sort(key=lambda x: x['overall_score'], reverse=True)
+
+ # 更新排名
+ for i, rec in enumerate(enhanced_recommendations):
+ rec['rank'] = i + 1
+
+ print(f"Applied intelligent trait matching with average bonus: {sum(r['intelligence_bonus'] for r in enhanced_recommendations) / len(enhanced_recommendations):.3f}")
+
+ return enhanced_recommendations
+
+ except Exception as e:
+ print(f"Error in intelligent trait matching: {str(e)}")
+ # 如果特徵匹配失敗,返回原始推薦
+ return recommendations
+
+ def _extract_enhanced_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]:
+ """提取增強的生活方式關鍵字(用於智能特徵匹配)"""
+ keywords = {
+ 'intelligence_preference': [],
+ 'grooming_preference': [],
+ 'temperament_preference': [],
+ 'experience_level': [],
+ 'lifespan_preference': []
+ }
+
+ text = user_input.lower()
+
+ # 智力偏好檢測
+ smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant']
+ independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves']
+
+ if any(term in text for term in smart_terms):
+ keywords['intelligence_preference'].append('high')
+ if any(term in text for term in independent_terms):
+ keywords['intelligence_preference'].append('independent')
+
+ # 美容偏好檢測
+ low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat']
+ high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming']
+
+ if any(term in text for term in low_grooming_terms):
+ keywords['grooming_preference'].append('low')
+ if any(term in text for term in high_grooming_terms):
+ keywords['grooming_preference'].append('high')
+
+ # 氣質偏好檢測
+ gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile']
+ playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy']
+ protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive']
+ friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious']
+
+ if any(term in text for term in gentle_terms):
+ keywords['temperament_preference'].append('gentle')
+ if any(term in text for term in playful_terms):
+ keywords['temperament_preference'].append('playful')
+ if any(term in text for term in protective_terms):
+ keywords['temperament_preference'].append('protective')
+ if any(term in text for term in friendly_terms):
+ keywords['temperament_preference'].append('friendly')
+
+ # 經驗水平檢測
+ beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced']
+ advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned']
+
+ if any(term in text for term in beginner_terms):
+ keywords['experience_level'].append('beginner')
+ if any(term in text for term in advanced_terms):
+ keywords['experience_level'].append('advanced')
+
+ # 壽命偏好檢測
+ long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity']
+ healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution']
+
+ if any(term in text for term in long_lived_terms):
+ keywords['lifespan_preference'].append('long')
+ if any(term in text for term in healthy_terms):
+ keywords['lifespan_preference'].append('healthy')
+
+ return keywords
+
+ def calculate_enhanced_matching_score(self, breed: str, breed_info: dict, user_description: str, base_similarity: float) -> dict:
+ """計算增強的匹配分數,基於用戶描述和品種特性"""
+ try:
+ user_desc = user_description.lower()
+
+ # 分析用戶需求
+ space_requirements = self._analyze_space_requirements(user_desc)
+ exercise_requirements = self._analyze_exercise_requirements(user_desc)
+ noise_requirements = self._analyze_noise_requirements(user_desc)
+ size_requirements = self._analyze_size_requirements(user_desc)
+ family_requirements = self._analyze_family_requirements(user_desc)
+
+ # 獲取品種特性
+ breed_size = breed_info.get('Size', '').lower()
+ breed_exercise = breed_info.get('Exercise Needs', '').lower()
+ breed_noise = breed_noise_info.get(breed, {}).get('noise_level', 'moderate').lower()
+ breed_temperament = breed_info.get('Temperament', '').lower()
+ breed_good_with_children = breed_info.get('Good with Children', '').lower()
+
+ # 計算各維度匹配分數
+ dimension_scores = {}
+
+ # 空間匹配 (30% 權重)
+ space_score = self._calculate_space_compatibility(space_requirements, breed_size, breed_exercise)
+ dimension_scores['space'] = space_score
+
+ # 運動需求匹配 (25% 權重)
+ exercise_score = self._calculate_exercise_compatibility(exercise_requirements, breed_exercise)
+ dimension_scores['exercise'] = exercise_score
+
+ # 噪音匹配 (20% 權重)
+ noise_score = self._calculate_noise_compatibility(noise_requirements, breed_noise)
+ dimension_scores['noise'] = noise_score
+
+ # 體型匹配 (15% 權重)
+ size_score = self._calculate_size_compatibility(size_requirements, breed_size)
+ dimension_scores['grooming'] = min(0.9, base_similarity + 0.1) # 美容需求基於語意相似度
+
+ # 家庭相容性 (10% 權重)
+ family_score = self._calculate_family_compatibility(family_requirements, breed_good_with_children, breed_temperament)
+ dimension_scores['family'] = family_score
+ dimension_scores['experience'] = min(0.9, base_similarity + 0.05) # 經驗需求基於語意相似度
+
+ # 應用硬約束過濾
+ constraint_penalty = self._apply_hard_constraints_enhanced(user_desc, breed_info)
+
+ # 計算加權總分 - 精確化維度權重配置
+ # 根據指導建議重新平衡維度權重
+ weighted_score = (
+ space_score * 0.30 + # 空間相容性(降低5%)
+ exercise_score * 0.28 + # 運動需求匹配(降低2%)
+ noise_score * 0.18 + # 噪音控制(提升3%)
+ family_score * 0.12 + # 家庭相容性(提升2%)
+ size_score * 0.08 + # 體型匹配(降低2%)
+ min(0.9, base_similarity + 0.1) * 0.04 # 護理需求(新增獨立權重)
+ )
+
+ # 優化完美匹配獎勵機制 - 降低觸發門檻並增加層次
+ perfect_match_bonus = 0.0
+ if space_score >= 0.88 and exercise_score >= 0.88 and noise_score >= 0.85:
+ perfect_match_bonus = 0.08 # 卓越匹配獎勵
+ elif space_score >= 0.82 and exercise_score >= 0.82 and noise_score >= 0.75:
+ perfect_match_bonus = 0.04 # 優秀匹配獎勵
+ elif space_score >= 0.75 and exercise_score >= 0.75:
+ perfect_match_bonus = 0.02 # 良好匹配獎勵
+
+ # 結合語意相似度與維度匹配 - 調整為75%維度匹配 25%語義相似度
+ base_combined_score = (weighted_score * 0.75 + base_similarity * 0.25) + perfect_match_bonus
+
+ # 應用漸進式約束懲罰,但確保基礎分數保障
+ raw_final_score = base_combined_score + constraint_penalty
+
+ # 實施動態分數保障機制 - 提升至40-42%基礎分數
+ # 根據品種特性動態調整基礎分數
+ base_guaranteed_score = 0.42 # 提升基礎保障分數
+
+ # 特殊品種基礎分數調整
+ high_adaptability_breeds = ['French_Bulldog', 'Pug', 'Golden_Retriever', 'Labrador_Retriever']
+ if any(breed in breed for breed in high_adaptability_breeds):
+ base_guaranteed_score = 0.45 # 高適應性品種更高基礎分數
+
+ # 動態分數分佈優化
+ if raw_final_score >= base_guaranteed_score:
+ # 對於高分品種,實施適度壓縮避免過度集中
+ if raw_final_score > 0.85:
+ compression_factor = 0.92 # 輕度壓縮高分
+ final_score = 0.85 + (raw_final_score - 0.85) * compression_factor
+ else:
+ final_score = raw_final_score
+ final_score = min(0.93, final_score) # 降低最高分數限制
+ else:
+ # 對於低分品種,使用改進的保障機制
+ normalized_raw_score = max(0.15, raw_final_score)
+ # 基礎保障75% + 實際計算25%,保持一定區分度
+ final_score = base_guaranteed_score * 0.75 + normalized_raw_score * 0.25
+ final_score = max(base_guaranteed_score, min(0.93, final_score))
+
+ lifestyle_bonus = max(0.0, weighted_score - base_similarity)
+
+ return {
+ 'final_score': final_score,
+ 'weighted_score': weighted_score,
+ 'lifestyle_bonus': lifestyle_bonus,
+ 'dimension_scores': dimension_scores,
+ 'constraint_penalty': constraint_penalty
+ }
+
+ except Exception as e:
+ print(f"Error in enhanced matching calculation for {breed}: {str(e)}")
+ return {
+ 'final_score': base_similarity,
+ 'weighted_score': base_similarity,
+ 'lifestyle_bonus': 0.0,
+ 'dimension_scores': {
+ 'space': base_similarity * 0.9,
+ 'exercise': base_similarity * 0.85,
+ 'grooming': base_similarity * 0.8,
+ 'experience': base_similarity * 0.75,
+ 'noise': base_similarity * 0.7,
+ 'family': base_similarity * 0.65
+ },
+ 'constraint_penalty': 0.0
+ }
+
+ def _analyze_space_requirements(self, user_desc: str) -> dict:
+ """分析空間需求 - 增強中等活動量識別"""
+ requirements = {'type': 'unknown', 'size': 'medium', 'importance': 0.5}
+
+ if any(word in user_desc for word in ['apartment', 'small apartment', 'small space', 'condo', 'flat']):
+ requirements['type'] = 'apartment'
+ requirements['size'] = 'small'
+ requirements['importance'] = 0.95 # 提高重要性
+ elif any(word in user_desc for word in ['medium-sized house', 'medium house', 'townhouse']):
+ requirements['type'] = 'medium_house'
+ requirements['size'] = 'medium'
+ requirements['importance'] = 0.8 # 中等活動量用戶的特殊標記
+ elif any(word in user_desc for word in ['large house', 'big house', 'yard', 'garden', 'large space', 'backyard']):
+ requirements['type'] = 'house'
+ requirements['size'] = 'large'
+ requirements['importance'] = 0.7
+
+ return requirements
+
+ def _analyze_exercise_requirements(self, user_desc: str) -> dict:
+ """分析運動需求 - 增強中等活動量識別"""
+ requirements = {'level': 'moderate', 'importance': 0.5}
+
+ # 低運動量識別
+ if any(word in user_desc for word in ["don't exercise", "don't exercise much", "low exercise", "minimal", "lazy", "not active"]):
+ requirements['level'] = 'low'
+ requirements['importance'] = 0.95
+ # 中等運動量的精確識別
+ elif any(phrase in user_desc for phrase in ['30 minutes', 'half hour', 'moderate', 'balanced', 'walk about']):
+ if 'walk' in user_desc or 'daily' in user_desc:
+ requirements['level'] = 'moderate'
+ requirements['importance'] = 0.85 # 中等活動量的特殊標記
+ # 高運動量識別
+ elif any(word in user_desc for word in ['active', 'hiking', 'outdoor activities', 'running', 'outdoors', 'love hiking']):
+ requirements['level'] = 'high'
+ requirements['importance'] = 0.9
+
+ return requirements
+
+ def _analyze_noise_requirements(self, user_desc: str) -> dict:
+ """分析噪音需求"""
+ requirements = {'tolerance': 'medium', 'importance': 0.5}
+
+ if any(word in user_desc for word in ['quiet', 'no bark', "won't bark", "doesn't bark", 'silent', 'peaceful']):
+ requirements['tolerance'] = 'low'
+ requirements['importance'] = 0.9
+ elif any(word in user_desc for word in ['loud', 'barking ok', 'noise ok']):
+ requirements['tolerance'] = 'high'
+ requirements['importance'] = 0.7
+
+ return requirements
+
+ def _analyze_size_requirements(self, user_desc: str) -> dict:
+ """分析體型需求"""
+ requirements = {'preferred': 'any', 'importance': 0.5}
+
+ if any(word in user_desc for word in ['small', 'tiny', 'little', 'lap dog', 'compact']):
+ requirements['preferred'] = 'small'
+ requirements['importance'] = 0.8
+ elif any(word in user_desc for word in ['large', 'big', 'giant']):
+ requirements['preferred'] = 'large'
+ requirements['importance'] = 0.8
+
+ return requirements
+
+ def _analyze_family_requirements(self, user_desc: str) -> dict:
+ """分析家庭需求"""
+ requirements = {'children': False, 'importance': 0.3}
+
+ if any(word in user_desc for word in ['children', 'kids', 'family', 'child']):
+ requirements['children'] = True
+ requirements['importance'] = 0.8
+
+ return requirements
+
+ def _calculate_space_compatibility(self, space_req: dict, breed_size: str, breed_exercise: str) -> float:
+ """計算空間相容性分數 - 增強中等活動量處理"""
+ if space_req['type'] == 'apartment':
+ if 'small' in breed_size or 'toy' in breed_size:
+ base_score = 0.95
+ elif 'medium' in breed_size:
+ if 'low' in breed_exercise:
+ base_score = 0.75
+ else:
+ base_score = 0.45 # 降低中型犬在公寓的分數
+ elif 'large' in breed_size:
+ base_score = 0.05 # 大型犬極度不適合公寓
+ elif 'giant' in breed_size:
+ base_score = 0.01 # 超大型犬完全不適合公寓
+ else:
+ base_score = 0.7
+ elif space_req['type'] == 'medium_house':
+ # 中型房屋的特殊處理 - 適合中等活動量用戶
+ if 'small' in breed_size or 'toy' in breed_size:
+ base_score = 0.9
+ elif 'medium' in breed_size:
+ base_score = 0.95 # 中型犬在中型房屋很適合
+ elif 'large' in breed_size:
+ if 'moderate' in breed_exercise or 'low' in breed_exercise:
+ base_score = 0.8 # 低運動量大型犬還可以
+ else:
+ base_score = 0.6 # 高運動量大型犬不太適合
+ elif 'giant' in breed_size:
+ base_score = 0.3 # 超大型犬在中型房屋不太適合
+ else:
+ base_score = 0.85
+ else:
+ # 大型房屋的情況
+ if 'small' in breed_size or 'toy' in breed_size:
+ base_score = 0.85
+ elif 'medium' in breed_size:
+ base_score = 0.9
+ elif 'large' in breed_size or 'giant' in breed_size:
+ base_score = 0.95
+ else:
+ base_score = 0.8
+
+ return min(0.95, base_score)
+
+ def _calculate_exercise_compatibility(self, exercise_req: dict, breed_exercise: str) -> float:
+ """計算運動需求相容性分數 - 增強中等活動量處理"""
+ if exercise_req['level'] == 'low':
+ if 'low' in breed_exercise or 'minimal' in breed_exercise:
+ return 0.95
+ elif 'moderate' in breed_exercise:
+ return 0.5 # 降低不匹配分數
+ elif 'high' in breed_exercise:
+ return 0.1 # 進一步降低高運動需求的匹配
+ else:
+ return 0.7
+ elif exercise_req['level'] == 'high':
+ if 'high' in breed_exercise:
+ return 0.95
+ elif 'moderate' in breed_exercise:
+ return 0.8
+ elif 'low' in breed_exercise:
+ return 0.6
+ else:
+ return 0.7
+ else: # moderate - 中等活動量的精確處理
+ if 'moderate' in breed_exercise:
+ return 0.95 # 完美匹配
+ elif 'low' in breed_exercise:
+ return 0.85 # 低運動需求的品種對中等活動量用戶也不錯
+ elif 'high' in breed_exercise:
+ return 0.5 # 中等活動量用戶不太適合高運動需求品種
+ else:
+ return 0.75
+
+ return 0.6
+
+ def _calculate_noise_compatibility(self, noise_req: dict, breed_noise: str) -> float:
+ """計算噪音相容性分數,更好處理複合等級"""
+ breed_noise_lower = breed_noise.lower()
+
+ if noise_req['tolerance'] == 'low':
+ if 'low' in breed_noise_lower and 'moderate' not in breed_noise_lower:
+ return 0.95 # 純低噪音
+ elif 'low-moderate' in breed_noise_lower or 'low to moderate' in breed_noise_lower:
+ return 0.8 # 低到中等噪音,還可接受
+ elif breed_noise_lower in ['moderate']:
+ return 0.4 # 中等噪音有些問題
+ elif 'high' in breed_noise_lower:
+ return 0.1 # 高噪音不適合
+ else:
+ return 0.6 # 未知噪音水平,保守估計
+ elif noise_req['tolerance'] == 'high':
+ if 'high' in breed_noise_lower:
+ return 0.9
+ elif 'moderate' in breed_noise_lower:
+ return 0.85
+ elif 'low' in breed_noise_lower:
+ return 0.8 # 安靜犬對高容忍度的人也很好
+ else:
+ return 0.8
+ else: # moderate tolerance
+ if 'moderate' in breed_noise_lower:
+ return 0.9
+ elif 'low' in breed_noise_lower:
+ return 0.85
+ elif 'high' in breed_noise_lower:
+ return 0.6
+ else:
+ return 0.75
+
+ return 0.7
+
+ def _calculate_size_compatibility(self, size_req: dict, breed_size: str) -> float:
+ """計算體型相容性分數"""
+ if size_req['preferred'] == 'small':
+ if any(word in breed_size for word in ['small', 'toy', 'tiny']):
+ return 0.9
+ elif 'medium' in breed_size:
+ return 0.6
+ else:
+ return 0.3
+ elif size_req['preferred'] == 'large':
+ if any(word in breed_size for word in ['large', 'giant']):
+ return 0.9
+ elif 'medium' in breed_size:
+ return 0.7
+ else:
+ return 0.4
+
+ return 0.7 # 無特別偏好
+
+ def _calculate_family_compatibility(self, family_req: dict, good_with_children: str, temperament: str) -> float:
+ """計算家庭相容性分數"""
+ if family_req['children']:
+ if 'yes' in good_with_children.lower():
+ return 0.9
+ elif any(word in temperament for word in ['gentle', 'patient', 'friendly']):
+ return 0.8
+ elif 'no' in good_with_children.lower():
+ return 0.2
+ else:
+ return 0.6
+
+ return 0.7
+
+ def _apply_hard_constraints_enhanced(self, user_desc: str, breed_info: dict) -> float:
+ """應用品種特性感知的動態懲罰機制"""
+ penalty = 0.0
+
+ # 建立懲罰衰減係數和補償機制
+ penalty_decay_factor = 0.7
+ breed_adaptability_bonus = 0.0
+ breed_size = breed_info.get('Size', '').lower()
+ breed_exercise = breed_info.get('Exercise Needs', '').lower()
+ breed_name = breed_info.get('Breed', '').replace(' ', '_')
+
+ # 公寓空間約束 - 品種特性感知懲罰機制
+ if 'apartment' in user_desc or 'small apartment' in user_desc:
+ if 'giant' in breed_size:
+ base_penalty = -0.35 # 減少基礎懲罰
+ # 特定品種適應性補償
+ adaptable_giants = ['Mastiff', 'Great Dane'] # 相對安靜的巨型犬
+ if any(adapt_breed in breed_name for adapt_breed in adaptable_giants):
+ breed_adaptability_bonus += 0.08
+ penalty += base_penalty * penalty_decay_factor
+ elif 'large' in breed_size:
+ base_penalty = -0.25 # 減少大型犬懲罰
+ # 適合公寓的大型犬補償
+ apartment_friendly_large = ['Greyhound', 'Great_Dane']
+ if any(apt_breed in breed_name for apt_breed in apartment_friendly_large):
+ breed_adaptability_bonus += 0.06
+ penalty += base_penalty * penalty_decay_factor
+ elif 'medium' in breed_size and 'high' in breed_exercise:
+ penalty += -0.15 * penalty_decay_factor # 進一步減少懲罰
+
+ # 運動需求不匹配 - 品種特性感知懲罰機制
+ if any(phrase in user_desc for phrase in ["don't exercise", "not active", "low exercise", "don't exercise much"]):
+ if 'high' in breed_exercise:
+ base_penalty = -0.28 # 減少基礎懲罰
+ # 低維護高運動犬種補償
+ adaptable_high_energy = ['Greyhound', 'Whippet'] # 運動爆發型,平時安靜
+ if any(adapt_breed in breed_name for adapt_breed in adaptable_high_energy):
+ breed_adaptability_bonus += 0.10
+ penalty += base_penalty * penalty_decay_factor
+ elif 'moderate' in breed_exercise:
+ penalty += -0.08 * penalty_decay_factor # 進一步減少懲罰
+
+ # 噪音控制需求不匹配 - 品種特性感知懲罰機制
+ if any(phrase in user_desc for phrase in ['quiet', "won't bark", "doesn't bark", "silent"]):
+ breed_noise = breed_noise_info.get(breed_name, {}).get('noise_level', 'moderate').lower()
+ if 'high' in breed_noise:
+ base_penalty = -0.18 # 減少基礎懲罰
+ # 訓練性良好的高噪音品種補償
+ trainable_vocal_breeds = ['German_Shepherd', 'Golden_Retriever']
+ if any(train_breed in breed_name for train_breed in trainable_vocal_breeds):
+ breed_adaptability_bonus += 0.05
+ penalty += base_penalty * penalty_decay_factor
+ elif 'moderate' in breed_noise and 'low' not in breed_noise:
+ penalty += -0.05 * penalty_decay_factor
+
+ # 體型偏好不匹配 - 漸進式懲罰
+ if any(phrase in user_desc for phrase in ['small', 'tiny', 'little']):
+ if 'giant' in breed_size:
+ penalty -= 0.35 # 超大型犬懲罰
+ elif 'large' in breed_size:
+ penalty -= 0.20 # 大型犬懲罰
+
+ # 中等活動量用戶的特殊約束處理 - 漸進式懲罰
+ moderate_activity_terms = ['30 minutes', 'half hour', 'moderate', 'balanced', 'medium-sized house']
+ if any(term in user_desc for term in moderate_activity_terms):
+ # 超大型犬對中等活動量用戶的適度懲罰
+ giant_breeds = ['Saint Bernard', 'Tibetan Mastiff', 'Great Dane', 'Mastiff', 'Newfoundland']
+ if any(giant in breed_name for giant in giant_breeds) or 'giant' in breed_size:
+ penalty -= 0.35 # 適度懲罰,不完全排除
+
+ # 中型房屋 + 超大型犬的額外考量
+ if 'medium-sized house' in user_desc and any(giant in breed_name for giant in giant_breeds):
+ if not any(high_activity in user_desc for high_activity in ['hiking', 'running', 'active', 'outdoor activities']):
+ penalty -= 0.15 # 輕度額外懲罰
+
+ # 30分鐘散步對極高運動需求品種的懲罰
+ if any(term in user_desc for term in ['30 minutes', 'half hour']) and 'walk' in user_desc:
+ high_energy_breeds = ['Siberian Husky', 'Border Collie', 'Jack Russell Terrier', 'Weimaraner']
+ if any(he_breed in breed_name for he_breed in high_energy_breeds) and 'high' in breed_exercise:
+ penalty -= 0.25 # 適度懲罰極高運動需求品種
+
+ # 添加特殊品種適應性補償機制
+ # 對於邊界適配品種,給予適度補償
+ boundary_adaptable_breeds = {
+ 'Italian_Greyhound': 0.08, # 安靜、低維護的小型犬
+ 'Boston_Bull': 0.06, # 適應性強的小型犬
+ 'Havanese': 0.05, # 友好適應的小型犬
+ 'Silky_terrier': 0.04, # 安靜的玩具犬
+ 'Basset': 0.07 # 低能量但友好的中型犬
+ }
+
+ if breed_name in boundary_adaptable_breeds:
+ breed_adaptability_bonus += boundary_adaptable_breeds[breed_name]
+
+ # 應用品種適應性補償並設置懲罰上限
+ final_penalty = penalty + breed_adaptability_bonus
+ # 限制最大懲罰,避免單一約束主導評分
+ final_penalty = max(-0.4, final_penalty)
+
+ return final_penalty
+
+ def get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]:
+ """獲取品種特徵"""
+ breed_info = get_dog_description(breed)
+ if not breed_info:
+ return {}
+
+ characteristics = {
+ 'size': breed_info.get('Size', 'Unknown'),
+ 'temperament': breed_info.get('Temperament', ''),
+ 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
+ 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
+ 'good_with_children': breed_info.get('Good with Children', 'Unknown'),
+ 'lifespan': breed_info.get('Lifespan', '10-12 years'),
+ 'description': breed_info.get('Description', '')
+ }
+
+ # 添加噪音資訊
+ noise_info = breed_noise_info.get(breed, {})
+ characteristics['noise_level'] = noise_info.get('noise_level', 'moderate')
+
+ return characteristics
+
+ def get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]:
+ """將標準化品種信息轉換為字典格式"""
+ try:
+ size_map = {1: 'Tiny', 2: 'Small', 3: 'Medium', 4: 'Large', 5: 'Giant'}
+ exercise_map = {1: 'Low', 2: 'Moderate', 3: 'High', 4: 'Very High'}
+ care_map = {1: 'Low', 2: 'Moderate', 3: 'High'}
+
+ return {
+ 'Size': size_map.get(standardized_info.size_category, 'Medium'),
+ 'Exercise Needs': exercise_map.get(standardized_info.exercise_level, 'Moderate'),
+ 'Grooming Needs': care_map.get(standardized_info.care_complexity, 'Moderate'),
+ 'Good with Children': 'Yes' if standardized_info.child_compatibility >= 0.8 else
+ 'No' if standardized_info.child_compatibility <= 0.2 else 'Unknown',
+ 'Temperament': 'Varies by individual',
+ 'Lifespan': '10-12 years',
+ 'Description': f'A {size_map.get(standardized_info.size_category, "medium")} sized breed'
+ }
+ except Exception as e:
+ print(f"Error converting standardized info: {str(e)}")
+ return {}
+
+ def get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]:
+ """當增強系統失敗時獲取備用推薦"""
+ try:
+ safe_breeds = [
+ ('Labrador Retriever', 0.85),
+ ('Golden Retriever', 0.82),
+ ('Cavalier King Charles Spaniel', 0.80),
+ ('French Bulldog', 0.78),
+ ('Boston Terrier', 0.76),
+ ('Bichon Frise', 0.74),
+ ('Pug', 0.72),
+ ('Cocker Spaniel', 0.70)
+ ]
+
+ recommendations = []
+ for i, (breed, score) in enumerate(safe_breeds[:top_k]):
+ breed_info = get_dog_description(breed.replace(' ', '_')) or {}
+
+ recommendation = {
+ 'breed': breed,
+ 'rank': i + 1,
+ 'overall_score': score,
+ 'final_score': score,
+ 'semantic_score': score * 0.8,
+ 'comparative_bonus': 0.0,
+ 'lifestyle_bonus': 0.0,
+ 'size': breed_info.get('Size', 'Unknown'),
+ 'temperament': breed_info.get('Temperament', ''),
+ 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
+ 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
+ 'good_with_children': breed_info.get('Good with Children', 'Yes'),
+ 'lifespan': breed_info.get('Lifespan', '10-12 years'),
+ 'description': breed_info.get('Description', ''),
+ 'search_type': 'fallback'
+ }
+ recommendations.append(recommendation)
+
+ return recommendations
+
+ except Exception as e:
+ print(f"Error generating fallback recommendations: {str(e)}")
+ return []
diff --git a/recommendation_formatter.py b/recommendation_formatter.py
index 5d845161be3df8ba159495c3d1a8b1e307defcd1..527e9a00e304c2239e187c679bba9a9d63487f85 100644
--- a/recommendation_formatter.py
+++ b/recommendation_formatter.py
@@ -28,6 +28,10 @@ def get_breed_recommendations(user_prefs: UserPreferences, top_n: int = 15) -> L
breed = breed_tuple[0]
base_breed = breed.split('(')[0].strip()
+ # 過濾掉野生動物品種
+ if base_breed == 'Dhole':
+ continue
+
if base_breed in seen_breeds:
continue
seen_breeds.add(base_breed)
@@ -127,7 +131,7 @@ def get_breed_recommendations(user_prefs: UserPreferences, top_n: int = 15) -> L
print(f"Breeds after filtering: {len(recommendations)}")
- # 嚴格按照 final_score 排序
+ # 按照 final_score 排序
recommendations.sort(key=lambda x: (round(-x['final_score'], 4), x['breed']))
# 修正後的推薦選擇邏輯,移除有問題的分數比較
diff --git a/semantic_breed_recommender.py b/semantic_breed_recommender.py
index 8e4acf3482615c7460d861d7871198ad3468cbfc..46e992e33d1ae829c767943900df9b8051c5f952 100644
--- a/semantic_breed_recommender.py
+++ b/semantic_breed_recommender.py
@@ -18,722 +18,134 @@ from constraint_manager import ConstraintManager, apply_breed_constraints
from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore
from score_calibrator import ScoreCalibrator, calibrate_breed_scores
from config_manager import get_config_manager, get_standardized_breed_data
-
-@dataclass
-class BreedDescriptionVector:
- """Data structure for breed description vectorization"""
- breed_name: str
- description_text: str
- embedding: np.ndarray
- characteristics: Dict[str, Any]
+from semantic_vector_manager import SemanticVectorManager, BreedDescriptionVector
+from user_query_analyzer import UserQueryAnalyzer
+from matching_score_calculator import MatchingScoreCalculator
class SemanticBreedRecommender:
"""
- Enhanced SBERT-based semantic breed recommendation system
- Provides multi-dimensional natural language understanding for dog breed recommendations
+ 增強的基於 SBERT 的語義品種推薦系統 (Facade Pattern)
+ 為狗品種推薦提供多維度自然語言理解
"""
def __init__(self):
- """Initialize the semantic recommender"""
- self.model_name = 'all-MiniLM-L6-v2' # Efficient SBERT model
- self.sbert_model = None
- self._sbert_loading_attempted = False
- self.breed_vectors = {}
- self.breed_list = self._get_breed_list()
- self.comparative_keywords = {
- 'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8,
- 'then': 0.7, 'second': 0.7, 'followed': 0.6,
- 'third': 0.5, 'least': 0.3, 'dislike': 0.2
- }
- # Defer SBERT model loading until needed in GPU context
- # This prevents CUDA initialization issues in ZeroGPU environment
- print("SemanticBreedRecommender initialized (SBERT loading deferred)")
-
- # Initialize multi-head scorer with SBERT model if enhanced mode is enabled
- # if self.sbert_model:
- # self.multi_head_scorer = MultiHeadScorer(self.sbert_model)
- # print("Multi-head scorer initialized with SBERT model")
-
- def _get_breed_list(self) -> List[str]:
- """Get breed list from database"""
- try:
- conn = sqlite3.connect('animal_detector.db')
- cursor = conn.cursor()
- cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog")
- breeds = [row[0] for row in cursor.fetchall()]
- cursor.close()
- conn.close()
- return breeds
- except Exception as e:
- print(f"Error getting breed list: {str(e)}")
- # Backup breed list for Google Colab environment
- return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever',
- 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier']
-
- def _initialize_model(self):
- """Initialize SBERT model with fallback - designed for ZeroGPU compatibility"""
- if self.sbert_model is not None or self._sbert_loading_attempted:
- return self.sbert_model
-
- try:
- print("Loading SBERT model in GPU context...")
- # Try different model names if the primary one fails
- model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2']
-
- for model_name in model_options:
- try:
- # Specify device explicitly to handle ZeroGPU environment
- import torch
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
- self.sbert_model = SentenceTransformer(model_name, device=device)
- self.model_name = model_name
- print(f"SBERT model {model_name} loaded successfully on {device}")
- return self.sbert_model
- except Exception as model_e:
- print(f"Failed to load {model_name}: {str(model_e)}")
- continue
-
- # If all models fail
- print("All SBERT models failed to load. Using basic text matching fallback.")
- self.sbert_model = None
- return None
-
- except Exception as e:
- print(f"Failed to initialize any SBERT model: {str(e)}")
- print(traceback.format_exc())
- print("Will provide basic text-based recommendations without embeddings")
- self.sbert_model = None
- return None
- finally:
- self._sbert_loading_attempted = True
-
- def _create_breed_description(self, breed: str) -> str:
- """Create comprehensive natural language description for breed with all key characteristics"""
- try:
- # Get all information sources
- breed_info = get_dog_description(breed) or {}
- health_info = breed_health_info.get(breed, {}) if breed_health_info else {}
- noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {}
-
- breed_display_name = breed.replace('_', ' ')
- description_parts = []
-
- # 1. Basic size and physical characteristics
- size = breed_info.get('Size', 'medium').lower()
- description_parts.append(f"{breed_display_name} is a {size} sized dog breed")
-
- # 2. Temperament and personality (critical for matching)
- temperament = breed_info.get('Temperament', '')
- if temperament:
- description_parts.append(f"with a {temperament.lower()} temperament")
-
- # 3. Exercise and activity level (critical for apartment living)
- exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower()
- if 'high' in exercise_needs or 'very high' in exercise_needs:
- description_parts.append("requiring high daily exercise and mental stimulation")
- elif 'low' in exercise_needs or 'minimal' in exercise_needs:
- description_parts.append("with minimal exercise requirements, suitable for apartment living")
- else:
- description_parts.append("with moderate exercise needs")
-
- # 4. Noise characteristics (critical for quiet requirements)
- noise_level = noise_info.get('noise_level', 'moderate').lower()
- if 'low' in noise_level or 'quiet' in noise_level:
- description_parts.append("known for being quiet and rarely barking")
- elif 'high' in noise_level or 'loud' in noise_level:
- description_parts.append("tends to be vocal and bark frequently")
- else:
- description_parts.append("with moderate barking tendencies")
-
- # 5. Living space compatibility
- if size in ['small', 'tiny']:
- description_parts.append("excellent for small apartments and limited spaces")
- elif size in ['large', 'giant']:
- description_parts.append("requiring large living spaces and preferably a yard")
- else:
- description_parts.append("adaptable to various living situations")
-
- # 6. Grooming and maintenance
- grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower()
- if 'high' in grooming_needs:
- description_parts.append("requiring regular professional grooming")
- elif 'low' in grooming_needs:
- description_parts.append("with minimal grooming requirements")
- else:
- description_parts.append("with moderate grooming needs")
+ """初始化語義品種推薦器"""
+ # 初始化語義向量管理器
+ self.vector_manager = SemanticVectorManager()
- # 7. Family compatibility
- good_with_children = breed_info.get('Good with Children', 'Yes')
- if good_with_children == 'Yes':
- description_parts.append("excellent with children and families")
- else:
- description_parts.append("better suited for adult households")
-
- # 8. Intelligence and trainability (from database description)
- intelligence_keywords = []
- description_text = breed_info.get('Description', '').lower()
-
- if description_text:
- # Extract intelligence indicators from description
- if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']):
- intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner'])
- elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']):
- intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train'])
- else:
- intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency'])
-
- # Extract working/purpose traits from description
- if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']):
- intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented'])
- elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']):
- intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused'])
-
- # Add intelligence context to description
- if intelligence_keywords:
- description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}")
-
- # 9. Special characteristics and purpose (enhanced with database mining)
- if breed_info.get('Description'):
- desc = breed_info.get('Description', '')[:150] # Increased to 150 chars for more context
- if desc:
- # Extract key traits from description for better semantic matching
- desc_lower = desc.lower()
- key_traits = []
-
- # Extract key behavioral traits from description
- if 'friendly' in desc_lower:
- key_traits.append('friendly')
- if 'gentle' in desc_lower:
- key_traits.append('gentle')
- if 'energetic' in desc_lower or 'active' in desc_lower:
- key_traits.append('energetic')
- if 'calm' in desc_lower or 'peaceful' in desc_lower:
- key_traits.append('calm')
- if 'protective' in desc_lower or 'guard' in desc_lower:
- key_traits.append('protective')
-
- trait_text = f" and {', '.join(key_traits)}" if key_traits else ""
- description_parts.append(f"Known for: {desc.lower()}{trait_text}")
-
- # 9. Care level requirements
- try:
- care_level = breed_info.get('Care Level', 'moderate')
- if isinstance(care_level, str):
- description_parts.append(f"requiring {care_level.lower()} overall care level")
- else:
- description_parts.append("requiring moderate overall care level")
- except Exception as e:
- print(f"Error processing care level for {breed}: {str(e)}")
- description_parts.append("requiring moderate overall care level")
-
- # 10. Lifespan information
- try:
- lifespan = breed_info.get('Lifespan', '10-12 years')
- if lifespan and isinstance(lifespan, str) and lifespan.strip():
- description_parts.append(f"with a typical lifespan of {lifespan}")
- else:
- description_parts.append("with a typical lifespan of 10-12 years")
- except Exception as e:
- print(f"Error processing lifespan for {breed}: {str(e)}")
- description_parts.append("with a typical lifespan of 10-12 years")
-
- # Create comprehensive description
- full_description = '. '.join(description_parts) + '.'
-
- # Add comprehensive keywords for better semantic matching
- keywords = []
-
- # Basic breed name keywords
- keywords.extend([word.lower() for word in breed_display_name.split()])
-
- # Temperament keywords
- if temperament:
- keywords.extend([word.lower().strip(',') for word in temperament.split()])
-
- # Size-based keywords
- if 'small' in size or 'tiny' in size:
- keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap'])
- elif 'large' in size or 'giant' in size:
- keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor'])
- else:
- keywords.extend(['medium', 'moderate', 'average', 'balanced'])
+ # 初始化用戶查詢分析器
+ self.query_analyzer = UserQueryAnalyzer(self.vector_manager.get_breed_list())
- # Activity level keywords
- exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower()
- if 'high' in exercise_needs:
- keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic'])
- elif 'low' in exercise_needs:
- keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary'])
- else:
- keywords.extend(['moderate', 'balanced', 'walks', 'regular'])
+ # 初始化匹配評分計算器
+ self.score_calculator = MatchingScoreCalculator(self.vector_manager.get_breed_list())
- # Noise level keywords
- noise_level = noise_info.get('noise_level', 'moderate').lower()
- if 'quiet' in noise_level or 'low' in noise_level:
- keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise'])
- elif 'high' in noise_level or 'loud' in noise_level:
- keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog'])
-
- # Living situation keywords
- if size in ['small', 'tiny'] and 'low' in exercise_needs:
- keywords.extend(['apartment', 'city', 'urban', 'small-space'])
- if size in ['large', 'giant'] or 'high' in exercise_needs:
- keywords.extend(['house', 'yard', 'suburban', 'rural', 'space'])
-
- # Family keywords
- good_with_children = breed_info.get('Good with Children', 'Yes')
- if good_with_children == 'Yes':
- keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle'])
-
- # Intelligence and trainability keywords (from database description mining)
- if intelligence_keywords:
- keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()])
-
- # Grooming-based keywords (enhanced)
- grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower()
- if 'high' in grooming_needs:
- keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care'])
- elif 'low' in grooming_needs:
- keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go'])
- else:
- keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care'])
-
- # Lifespan-based keywords
- lifespan = breed_info.get('Lifespan', '10-12 years')
- if lifespan and isinstance(lifespan, str):
- try:
- # Extract years from lifespan string (e.g., "10-12 years" or "12-15 years")
- import re
- years = re.findall(r'\d+', lifespan)
- if years:
- avg_years = sum(int(y) for y in years) / len(years)
- if avg_years >= 14:
- keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan'])
- elif avg_years <= 8:
- keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care'])
- else:
- keywords.extend(['average-lifespan', 'moderate-longevity'])
- except:
- keywords.extend(['average-lifespan'])
-
- # Add keywords to description for better semantic matching
- unique_keywords = list(set(keywords))
- keyword_text = ' '.join(unique_keywords)
- full_description += f" Additional context: {keyword_text}"
-
- return full_description
+ # 保留原有屬性以維持向後兼容性
+ self.model_name = self.vector_manager.model_name
+ self.sbert_model = self.vector_manager.get_sbert_model()
+ self.breed_vectors = self.vector_manager.get_breed_vectors()
+ self.breed_list = self.vector_manager.get_breed_list()
+ self.comparative_keywords = self.query_analyzer.comparative_keywords
- except Exception as e:
- print(f"Error creating description for {breed}: {str(e)}")
- return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics."
-
- def _build_breed_vectors(self):
- """Build vector representations for all breeds - called lazily when needed"""
+ # 初始化增強系統組件(如果可用)
try:
- print("Building breed vector database...")
-
- # Initialize model if not already done
- if self.sbert_model is None:
- self._initialize_model()
-
- # Skip if model is not available
- if self.sbert_model is None:
- print("SBERT model not available, skipping vector building")
- return
-
- for breed in self.breed_list:
- description = self._create_breed_description(breed)
-
- # Generate embedding vector
- embedding = self.sbert_model.encode(description, convert_to_tensor=False)
-
- # Get breed characteristics
- breed_info = get_dog_description(breed)
- characteristics = {
- 'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium',
- 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate',
- 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate',
- 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes',
- 'temperament': breed_info.get('Temperament', '') if breed_info else ''
- }
-
- self.breed_vectors[breed] = BreedDescriptionVector(
- breed_name=breed,
- description_text=description,
- embedding=embedding,
- characteristics=characteristics
- )
-
- print(f"Successfully built {len(self.breed_vectors)} breed vectors")
-
- except Exception as e:
- print(f"Error building breed vectors: {str(e)}")
- print(traceback.format_exc())
- raise
+ self.query_engine = QueryUnderstandingEngine()
+ self.constraint_manager = ConstraintManager()
+ self.multi_head_scorer = None
+ self.score_calibrator = ScoreCalibrator()
+ self.config_manager = get_config_manager()
+
+ # 如果 SBERT 模型可用,初始化多頭評分器
+ if self.sbert_model:
+ self.multi_head_scorer = MultiHeadScorer(self.sbert_model)
+ print("Multi-head scorer initialized with SBERT model")
+ except ImportError:
+ print("Enhanced system components not available, using basic functionality")
+ self.query_engine = None
+ self.constraint_manager = None
+ self.multi_head_scorer = None
+ self.score_calibrator = None
+ self.config_manager = None
def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]:
- """Parse comparative preference expressions"""
- breed_scores = {}
-
- # Normalize input
- text = user_input.lower()
-
- # Find breed names and preference keywords
- for breed in self.breed_list:
- breed_display = breed.replace('_', ' ').lower()
- breed_words = breed_display.split()
-
- # Check if this breed is mentioned
- breed_mentioned = False
- for word in breed_words:
- if word in text:
- breed_mentioned = True
- break
-
- if breed_mentioned:
- # Find nearby preference keywords
- breed_score = 0.5 # Default score
-
- # Look for keywords within 50 characters of breed name
- breed_pos = text.find(breed_words[0])
- if breed_pos != -1:
- # Check for keywords in context
- context_start = max(0, breed_pos - 50)
- context_end = min(len(text), breed_pos + 50)
- context = text[context_start:context_end]
-
- for keyword, score in self.comparative_keywords.items():
- if keyword in context:
- breed_score = max(breed_score, score)
-
- breed_scores[breed] = breed_score
-
- return breed_scores
+ """解析比較性偏好表達"""
+ return self.query_analyzer.parse_comparative_preferences(user_input)
def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]:
- """Enhanced lifestyle keyword extraction with better pattern matching"""
- keywords = {
- 'living_space': [],
- 'activity_level': [],
- 'family_situation': [],
- 'noise_preference': [],
- 'size_preference': [],
- 'care_level': [],
- 'special_needs': [],
- 'intelligence_preference': [],
- 'grooming_preference': [],
- 'lifespan_preference': [],
- 'temperament_preference': [],
- 'experience_level': []
- }
-
- text = user_input.lower()
-
- # Enhanced living space detection
- apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor']
- house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm']
-
- if any(term in text for term in apartment_terms):
- keywords['living_space'].append('apartment')
- if any(term in text for term in house_terms):
- keywords['living_space'].append('house')
-
- # Enhanced activity level detection
- high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging',
- 'athletic', 'adventure', 'vigorous', 'high energy', 'workout']
- low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed',
- 'peaceful', 'quiet lifestyle', 'minimal exercise']
- moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise']
-
- if any(term in text for term in high_activity):
- keywords['activity_level'].append('high')
- if any(term in text for term in low_activity):
- keywords['activity_level'].append('low')
- if any(term in text for term in moderate_activity):
- keywords['activity_level'].append('moderate')
-
- # Enhanced family situation detection
- children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age']
- elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature']
- single_terms = ['single', 'alone', 'individual', 'solo', 'myself']
-
- if any(term in text for term in children_terms):
- keywords['family_situation'].append('children')
- if any(term in text for term in elderly_terms):
- keywords['family_situation'].append('elderly')
- if any(term in text for term in single_terms):
- keywords['family_situation'].append('single')
-
- # Enhanced noise preference detection
- quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise',
- 'soft-spoken', 'calm', 'tranquil']
- noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter']
-
- if any(term in text for term in quiet_terms):
- keywords['noise_preference'].append('low')
- if any(term in text for term in noise_ok_terms):
- keywords['noise_preference'].append('high')
-
- # Enhanced size preference detection
- small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog']
- large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great']
- medium_terms = ['medium', 'moderate size', 'average', 'mid-sized']
-
- if any(term in text for term in small_terms):
- keywords['size_preference'].append('small')
- if any(term in text for term in large_terms):
- keywords['size_preference'].append('large')
- if any(term in text for term in medium_terms):
- keywords['size_preference'].append('medium')
-
- # Enhanced care level detection
- low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go']
- high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing']
-
- if any(term in text for term in low_care):
- keywords['care_level'].append('low')
- if any(term in text for term in high_care):
- keywords['care_level'].append('high')
-
- # Intelligence preference detection (NEW)
- smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant']
- independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves']
-
- if any(term in text for term in smart_terms):
- keywords['intelligence_preference'].append('high')
- if any(term in text for term in independent_terms):
- keywords['intelligence_preference'].append('independent')
-
- # Grooming preference detection (NEW)
- low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat']
- high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming']
-
- if any(term in text for term in low_grooming_terms):
- keywords['grooming_preference'].append('low')
- if any(term in text for term in high_grooming_terms):
- keywords['grooming_preference'].append('high')
-
- # Lifespan preference detection (NEW)
- long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity']
- healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution']
-
- if any(term in text for term in long_lived_terms):
- keywords['lifespan_preference'].append('long')
- if any(term in text for term in healthy_terms):
- keywords['lifespan_preference'].append('healthy')
-
- # Temperament preference detection (NEW)
- gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile']
- playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy']
- protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive']
- friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious']
-
- if any(term in text for term in gentle_terms):
- keywords['temperament_preference'].append('gentle')
- if any(term in text for term in playful_terms):
- keywords['temperament_preference'].append('playful')
- if any(term in text for term in protective_terms):
- keywords['temperament_preference'].append('protective')
- if any(term in text for term in friendly_terms):
- keywords['temperament_preference'].append('friendly')
-
- # Experience level detection (NEW)
- beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced']
- advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned']
-
- if any(term in text for term in beginner_terms):
- keywords['experience_level'].append('beginner')
- if any(term in text for term in advanced_terms):
- keywords['experience_level'].append('advanced')
-
- # Enhanced special needs detection
- guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender']
- companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly']
- hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed']
- multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals']
-
- if any(term in text for term in guard_terms):
- keywords['special_needs'].append('guard')
- if any(term in text for term in companion_terms):
- keywords['special_needs'].append('companion')
- if any(term in text for term in hypoallergenic_terms):
- keywords['special_needs'].append('hypoallergenic')
- if any(term in text for term in multi_pet_terms):
- keywords['special_needs'].append('multi_pet')
-
- return keywords
+ """增強的生活方式關鍵字提取,具有更好的模式匹配"""
+ return self.query_analyzer.extract_lifestyle_keywords(user_input)
def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]:
- """Apply size distribution correction to prevent large breed bias"""
- if len(recommendations) < 10:
- return recommendations
-
- # Analyze size distribution
- size_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0}
-
- for rec in recommendations:
- breed_info = get_dog_description(rec['breed'])
- if breed_info:
- size = self._normalize_breed_size(breed_info.get('Size', 'Medium'))
- size_counts[size] += 1
-
- total_recs = len(recommendations)
- large_giant_ratio = (size_counts['large'] + size_counts['giant']) / total_recs
-
- # If more than 70% are large/giant breeds, apply correction
- if large_giant_ratio > 0.7:
- corrected_recommendations = []
- size_quotas = {'toy': 2, 'small': 4, 'medium': 6, 'large': 2, 'giant': 1}
- current_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0}
-
- # First pass: add breeds within quotas
- for rec in recommendations:
- breed_info = get_dog_description(rec['breed'])
- if breed_info:
- size = self._normalize_breed_size(breed_info.get('Size', 'Medium'))
- if current_counts[size] < size_quotas[size]:
- corrected_recommendations.append(rec)
- current_counts[size] += 1
-
- # Second pass: fill remaining slots with best remaining candidates
- remaining_slots = 15 - len(corrected_recommendations)
- remaining_breeds = [rec for rec in recommendations if rec not in corrected_recommendations]
-
- corrected_recommendations.extend(remaining_breeds[:remaining_slots])
- return corrected_recommendations
-
- return recommendations
+ """應用尺寸分佈修正以防止大型品種偏差"""
+ return self.score_calculator.apply_size_distribution_correction(recommendations)
def _normalize_breed_size(self, size: str) -> str:
- """Normalize breed size to standard categories"""
- if not isinstance(size, str):
- return 'medium'
-
- size_lower = size.lower()
- if any(term in size_lower for term in ['toy', 'tiny']):
- return 'toy'
- elif 'small' in size_lower:
- return 'small'
- elif 'medium' in size_lower:
- return 'medium'
- elif 'large' in size_lower:
- return 'large'
- elif any(term in size_lower for term in ['giant', 'extra large']):
- return 'giant'
- else:
- return 'medium'
+ """標準化品種尺寸到標準分類"""
+ return self.score_calculator._normalize_breed_size(size)
def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]:
- """Parse user requirements more accurately"""
- requirements = {
- 'living_space': None,
- 'exercise_level': None,
- 'preferred_size': None,
- 'noise_tolerance': None
- }
-
- input_lower = user_input.lower()
-
- # Living space detection
- if 'apartment' in input_lower or 'small' in input_lower:
- requirements['living_space'] = 'apartment'
- elif 'large house' in input_lower or 'big' in input_lower:
- requirements['living_space'] = 'large_house'
- elif 'medium' in input_lower:
- requirements['living_space'] = 'medium_house'
-
- # Exercise level detection
- if "don't exercise" in input_lower or 'low exercise' in input_lower:
- requirements['exercise_level'] = 'low'
- elif any(term in input_lower for term in ['hiking', 'running', 'active']):
- requirements['exercise_level'] = 'high'
- elif '30 minutes' in input_lower or 'moderate' in input_lower:
- requirements['exercise_level'] = 'moderate'
-
- # Size preference detection
- if any(term in input_lower for term in ['small dog', 'tiny', 'toy']):
- requirements['preferred_size'] = 'small'
- elif any(term in input_lower for term in ['large dog', 'big dog']):
- requirements['preferred_size'] = 'large'
- elif 'medium' in input_lower:
- requirements['preferred_size'] = 'medium'
-
- return requirements
+ """更準確地解析用戶需求"""
+ return self.query_analyzer.parse_user_requirements(user_input)
def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float:
- """Enhanced hard constraints with stricter penalties"""
- penalty = 0.0
- user_text_lower = user_input.lower()
+ """增強硬約束,具有更嚴格的懲罰"""
+ return self.score_calculator.apply_hard_constraints(breed, user_input, breed_characteristics)
- # Get breed information
- breed_info = get_dog_description(breed)
- if not breed_info:
- return 0.0
-
- breed_size = breed_info.get('Size', '').lower()
- exercise_needs = breed_info.get('Exercise Needs', '').lower()
-
- # Apartment living constraints - MUCH STRICTER
- if any(term in user_text_lower for term in ['apartment', 'flat', 'studio', 'small space']):
- if 'giant' in breed_size:
- return -2.0 # Complete elimination
- elif 'large' in breed_size:
- if any(term in exercise_needs for term in ['high', 'very high']):
- return -2.0 # Complete elimination
- else:
- penalty -= 0.5 # Still significant penalty
- elif 'medium' in breed_size and 'very high' in exercise_needs:
- penalty -= 0.6
-
- # Exercise mismatch constraints
- if "don't exercise much" in user_text_lower or "low exercise" in user_text_lower:
- if any(term in exercise_needs for term in ['very high', 'extreme', 'intense']):
- return -2.0 # Complete elimination
- elif 'high' in exercise_needs:
- penalty -= 0.8
-
- # Moderate lifestyle detection
- if any(term in user_text_lower for term in ['moderate', 'balanced', '30 minutes', 'half hour']):
- # Penalize extremes
- if 'giant' in breed_size:
- penalty -= 0.7 # Strong penalty for giants
- elif 'very high' in exercise_needs:
- penalty -= 0.5
-
- # Children safety (existing logic remains but enhanced)
- if any(term in user_text_lower for term in ['child', 'kids', 'family', 'baby']):
- good_with_children = breed_info.get('Good with Children', '').lower()
- if good_with_children == 'no':
- return -2.0 # Complete elimination for safety
-
- return penalty
+ def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any],
+ lifestyle_keywords: Dict[str, List[str]]) -> float:
+ """增強生活方式匹配獎勵計算"""
+ return self.score_calculator.calculate_lifestyle_bonus(breed_characteristics, lifestyle_keywords)
+
+ def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]:
+ """基於增強關鍵字提取和數據庫挖掘應用智能特徵匹配"""
+ return self.score_calculator.apply_intelligent_trait_matching(recommendations, user_input)
+
+ def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]:
+ """將標準化品種信息轉換為字典格式"""
+ return self.score_calculator.get_breed_info_from_standardized(standardized_info)
+
+ def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]:
+ """當增強系統失敗時獲取備用推薦"""
+ return self.score_calculator.get_fallback_recommendations(top_k)
def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""
- Enhanced multi-dimensional semantic breed recommendation
+ 增強的多維度語義品種推薦
Args:
- user_input: User's natural language description
- top_k: Number of recommendations to return
+ user_input: 用戶的自然語言描述
+ top_k: 返回的推薦數量
Returns:
- List of recommended breeds with enhanced scoring
+ 增強評分的推薦品種列表
"""
try:
- # Stage 1: Query Understanding
- dimensions = self.query_engine.analyze_query(user_input)
- print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions")
-
- # Stage 2: Apply Constraints
- filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k))
- print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates")
+ # 階段 1: 查詢理解
+ if self.query_engine:
+ dimensions = self.query_engine.analyze_query(user_input)
+ print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions")
+ else:
+ print("Query engine not available, using basic analysis")
+ return self.get_semantic_recommendations(user_input, top_k)
- if not filter_result.passed_breeds:
- error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements."
- print(f"ERROR: {error_msg}")
- raise ValueError(error_msg)
+ # 階段 2: 應用約束
+ if self.constraint_manager:
+ filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k))
+ print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates")
- # Stage 3: Multi-head Scoring
+ if not filter_result.passed_breeds:
+ error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements."
+ print(f"ERROR: {error_msg}")
+ raise ValueError(error_msg)
+ else:
+ print("Constraint manager not available, using all breeds")
+ filter_result = type('FilterResult', (), {
+ 'passed_breeds': self.breed_list,
+ 'applied_constraints': [],
+ 'relaxed_constraints': [],
+ 'warnings': []
+ })()
+
+ # 階段 3: 多頭評分
if self.multi_head_scorer:
breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions)
print(f"Multi-head scoring completed for {len(breed_scores)} breeds")
@@ -741,24 +153,34 @@ class SemanticBreedRecommender:
print("Multi-head scorer not available, using fallback scoring")
return self.get_semantic_recommendations(user_input, top_k)
- # Stage 4: Score Calibration
- breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores]
- calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples)
- print(f"Score calibration: method={calibration_result.calibration_method}")
+ # 階段 4: 分數校準
+ if self.score_calibrator:
+ breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores]
+ calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples)
+ print(f"Score calibration: method={calibration_result.calibration_method}")
+ else:
+ print("Score calibrator not available, using raw scores")
+ calibration_result = type('CalibrationResult', (), {
+ 'score_mapping': {score.breed_name: score.final_score for score in breed_scores},
+ 'calibration_method': 'none'
+ })()
- # Stage 5: Generate Final Recommendations
+ # 階段 5: 生成最終推薦
final_recommendations = []
for i, breed_score in enumerate(breed_scores[:top_k]):
breed_name = breed_score.breed_name
- # Get calibrated score
+ # 獲取校準後的分數
calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score)
- # Get standardized breed info
- standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_'))
- if standardized_info:
- breed_info = self._get_breed_info_from_standardized(standardized_info)
+ # 獲取標準化品種信息
+ if self.config_manager:
+ standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_'))
+ if standardized_info:
+ breed_info = self._get_breed_info_from_standardized(standardized_info)
+ else:
+ breed_info = get_dog_description(breed_name.replace(' ', '_')) or {}
else:
breed_info = get_dog_description(breed_name.replace(' ', '_')) or {}
@@ -789,10 +211,10 @@ class SemanticBreedRecommender:
final_recommendations.append(recommendation)
- # Apply size distribution correction before returning
+ # 應用尺寸分佈修正
corrected_recommendations = self._apply_size_distribution_correction(final_recommendations)
- # Stage 6: Apply Intelligent Trait Matching Enhancement
+ # 階段 6: 應用智能特徵匹配增強
intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input)
print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching")
@@ -801,232 +223,72 @@ class SemanticBreedRecommender:
except Exception as e:
print(f"Error in enhanced semantic recommendations: {str(e)}")
print(traceback.format_exc())
- # Fallback to original method
+ # 回退到原始方法
return self.get_semantic_recommendations(user_input, top_k)
- def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]:
- """Apply intelligent trait matching based on enhanced keyword extraction and database mining"""
- try:
- # Extract enhanced keywords from user input
- extracted_keywords = self._extract_lifestyle_keywords(user_input)
-
- # Apply intelligent trait matching to each recommendation
- enhanced_recommendations = []
-
- for rec in recommendations:
- breed_name = rec['breed'].replace(' ', '_')
-
- # Get breed database information
- breed_info = get_dog_description(breed_name) or {}
-
- # Calculate intelligent trait bonuses
- intelligence_bonus = 0.0
- trait_match_details = {}
-
- # 1. Intelligence Matching
- if extracted_keywords.get('intelligence_preference'):
- intelligence_pref = extracted_keywords['intelligence_preference'][0]
- breed_desc = breed_info.get('Description', '').lower()
-
- if intelligence_pref == 'high':
- if any(word in breed_desc for word in ['intelligent', 'smart', 'clever', 'quick to learn', 'trainable']):
- intelligence_bonus += 0.05
- trait_match_details['intelligence_match'] = 'High intelligence match detected'
- elif any(word in breed_desc for word in ['stubborn', 'independent', 'difficult']):
- intelligence_bonus -= 0.02
- trait_match_details['intelligence_warning'] = 'May be challenging to train'
-
- elif intelligence_pref == 'independent':
- if any(word in breed_desc for word in ['independent', 'stubborn', 'strong-willed']):
- intelligence_bonus += 0.03
- trait_match_details['independence_match'] = 'Independent nature match'
-
- # 2. Grooming Preference Matching
- if extracted_keywords.get('grooming_preference'):
- grooming_pref = extracted_keywords['grooming_preference'][0]
- breed_grooming = breed_info.get('Grooming Needs', '').lower()
-
- if grooming_pref == 'low' and 'low' in breed_grooming:
- intelligence_bonus += 0.03
- trait_match_details['grooming_match'] = 'Low maintenance grooming match'
- elif grooming_pref == 'high' and 'high' in breed_grooming:
- intelligence_bonus += 0.03
- trait_match_details['grooming_match'] = 'High maintenance grooming match'
- elif grooming_pref == 'low' and 'high' in breed_grooming:
- intelligence_bonus -= 0.04
- trait_match_details['grooming_mismatch'] = 'High grooming needs may not suit preferences'
-
- # 3. Temperament Preference Matching
- if extracted_keywords.get('temperament_preference'):
- temp_prefs = extracted_keywords['temperament_preference']
- breed_temperament = breed_info.get('Temperament', '').lower()
- breed_desc = breed_info.get('Description', '').lower()
-
- temp_text = (breed_temperament + ' ' + breed_desc).lower()
-
- for temp_pref in temp_prefs:
- if temp_pref == 'gentle' and any(word in temp_text for word in ['gentle', 'calm', 'peaceful', 'mild']):
- intelligence_bonus += 0.04
- trait_match_details['temperament_match'] = f'Gentle temperament match: {temp_pref}'
- elif temp_pref == 'playful' and any(word in temp_text for word in ['playful', 'energetic', 'lively', 'fun']):
- intelligence_bonus += 0.04
- trait_match_details['temperament_match'] = f'Playful temperament match: {temp_pref}'
- elif temp_pref == 'protective' and any(word in temp_text for word in ['protective', 'guard', 'alert', 'watchful']):
- intelligence_bonus += 0.04
- trait_match_details['temperament_match'] = f'Protective temperament match: {temp_pref}'
- elif temp_pref == 'friendly' and any(word in temp_text for word in ['friendly', 'social', 'outgoing', 'people']):
- intelligence_bonus += 0.04
- trait_match_details['temperament_match'] = f'Friendly temperament match: {temp_pref}'
-
- # 4. Experience Level Matching
- if extracted_keywords.get('experience_level'):
- exp_level = extracted_keywords['experience_level'][0]
- breed_desc = breed_info.get('Description', '').lower()
-
- if exp_level == 'beginner':
- # Favor easy-to-handle breeds for beginners
- if any(word in breed_desc for word in ['easy', 'gentle', 'good for beginners', 'family', 'calm']):
- intelligence_bonus += 0.06
- trait_match_details['beginner_friendly'] = 'Good choice for first-time owners'
- elif any(word in breed_desc for word in ['challenging', 'dominant', 'requires experience', 'strong-willed']):
- intelligence_bonus -= 0.08
- trait_match_details['experience_warning'] = 'May be challenging for first-time owners'
-
- elif exp_level == 'advanced':
- # Advanced users can handle more challenging breeds
- if any(word in breed_desc for word in ['working', 'requires experience', 'intelligent', 'strong']):
- intelligence_bonus += 0.03
- trait_match_details['advanced_suitable'] = 'Good match for experienced owners'
-
- # 5. Lifespan Preference Matching
- if extracted_keywords.get('lifespan_preference'):
- lifespan_pref = extracted_keywords['lifespan_preference'][0]
- breed_lifespan = breed_info.get('Lifespan', '10-12 years')
-
- try:
- import re
- years = re.findall(r'\d+', breed_lifespan)
- if years:
- avg_years = sum(int(y) for y in years) / len(years)
- if lifespan_pref == 'long' and avg_years >= 13:
- intelligence_bonus += 0.02
- trait_match_details['longevity_match'] = f'Long lifespan match: {breed_lifespan}'
- elif lifespan_pref == 'healthy' and avg_years >= 12:
- intelligence_bonus += 0.02
- trait_match_details['health_match'] = f'Healthy lifespan: {breed_lifespan}'
- except:
- pass
-
- # Apply the intelligence bonus to the overall score
- original_score = rec['overall_score']
- enhanced_score = min(1.0, original_score + intelligence_bonus)
-
- # Create enhanced recommendation with trait matching details
- enhanced_rec = rec.copy()
- enhanced_rec['overall_score'] = enhanced_score
- enhanced_rec['intelligence_bonus'] = intelligence_bonus
- enhanced_rec['trait_match_details'] = trait_match_details
-
- # Add detailed explanation if significant enhancement occurred
- if abs(intelligence_bonus) > 0.02:
- enhancement_explanation = []
- for detail_key, detail_value in trait_match_details.items():
- enhancement_explanation.append(detail_value)
-
- if enhancement_explanation:
- current_explanation = enhanced_rec.get('explanation', '')
- enhanced_explanation = current_explanation + f" Enhanced matching: {'; '.join(enhancement_explanation)}"
- enhanced_rec['explanation'] = enhanced_explanation
-
- enhanced_recommendations.append(enhanced_rec)
-
- # Re-sort by enhanced overall score
- enhanced_recommendations.sort(key=lambda x: x['overall_score'], reverse=True)
-
- # Update ranks
- for i, rec in enumerate(enhanced_recommendations):
- rec['rank'] = i + 1
-
- print(f"Applied intelligent trait matching with average bonus: {sum(r['intelligence_bonus'] for r in enhanced_recommendations) / len(enhanced_recommendations):.3f}")
-
- return enhanced_recommendations
-
- except Exception as e:
- print(f"Error in intelligent trait matching: {str(e)}")
- # Return original recommendations if trait matching fails
- return recommendations
-
def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""
- Get breed recommendations based on natural language description
+ 基於自然語言描述獲取品種推薦
Args:
- user_input: User's natural language description
- top_k: Number of recommendations to return
+ user_input: 用戶的自然語言描述
+ top_k: 返回的推薦數量
Returns:
- List of recommended breeds
+ 推薦品種列表
"""
try:
print(f"Processing user input: {user_input}")
- # 嘗試載入SBERT模型(如果尚未載入)
- if self.sbert_model is None:
- self._initialize_model()
-
- # Check if model is available - if not, raise error
+ # 檢查模型是否可用 - 如果不可用,則報錯
if self.sbert_model is None:
error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again."
print(f"ERROR: {error_msg}")
raise RuntimeError(error_msg)
- # 確保breed vectors已建構
- if not self.breed_vectors:
- self._build_breed_vectors()
-
- # Generate user input embedding
- user_embedding = self.sbert_model.encode(user_input, convert_to_tensor=False)
+ # 生成用戶輸入嵌入
+ user_embedding = self.vector_manager.encode_text(user_input)
- # Parse comparative preferences
+ # 解析比較性偏好
comparative_prefs = self._parse_comparative_preferences(user_input)
- # Extract lifestyle keywords
+ # 提取生活方式關鍵字
lifestyle_keywords = self._extract_lifestyle_keywords(user_input)
- # Calculate similarity with all breeds and apply constraints
+ # 計算與所有品種的相似度並應用約束
similarities = []
for breed, breed_vector in self.breed_vectors.items():
- # Apply hard constraints first
+ # 首先應用硬約束
constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics)
- # Skip breeds that violate critical constraints
- if constraint_penalty <= -1.0: # Complete disqualification
+ # 跳過違反關鍵約束的品種
+ if constraint_penalty <= -1.0: # 完全取消資格
continue
- # Basic semantic similarity
+ # 基本語義相似度
semantic_score = cosine_similarity(
[user_embedding],
[breed_vector.embedding]
)[0][0]
- # Comparative preference weighting
+ # 比較性偏好加權
comparative_bonus = comparative_prefs.get(breed, 0.0)
- # Lifestyle matching bonus
+ # 生活方式匹配獎勵
lifestyle_bonus = self._calculate_lifestyle_bonus(
breed_vector.characteristics,
lifestyle_keywords
)
- # Apply constraint penalties
+ # 應用約束懲罰
lifestyle_bonus += constraint_penalty
- # Enhanced combined score with better distribution
- # Apply exponential scaling to create more natural score spread
- base_semantic = semantic_score ** 0.8 # Slightly compress high scores
- enhanced_lifestyle = lifestyle_bonus * 2.0 # Amplify lifestyle matching
- enhanced_comparative = comparative_bonus * 1.5 # Amplify breed preferences
+ # 更好分佈的增強組合分數
+ # 應用指數縮放以創建更自然的分數分佈
+ base_semantic = semantic_score ** 0.8 # 輕微壓縮高分
+ enhanced_lifestyle = lifestyle_bonus * 2.0 # 放大生活方式匹配
+ enhanced_comparative = comparative_bonus * 1.5 # 放大品種偏好
final_score = (
base_semantic * 0.55 +
@@ -1034,11 +296,11 @@ class SemanticBreedRecommender:
enhanced_lifestyle * 0.15
)
- # Add small random variation to break ties naturally
- random.seed(hash(breed)) # Consistent for same breed
+ # 添加小的隨機變化以自然地打破平局
+ random.seed(hash(breed)) # 對相同品種保持一致
final_score += random.uniform(-0.03, 0.03)
- # Ensure final score doesn't exceed 1.0
+ # 確保最終分數不超過 1.0
final_score = min(1.0, final_score)
similarities.append({
@@ -1049,10 +311,10 @@ class SemanticBreedRecommender:
'lifestyle_bonus': lifestyle_bonus
})
- # Calculate standardized display scores with balanced distribution
+ # 計算平衡分佈的標準化顯示分數
breed_display_scores = []
- # First, collect all semantic scores for normalization
+ # 首先,收集所有語義分數以進行標準化
all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities]
semantic_mean = np.mean(all_semantic_scores)
semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0
@@ -1061,24 +323,24 @@ class SemanticBreedRecommender:
breed = breed_data['breed']
base_semantic = breed_data['semantic_score']
- # Normalize semantic score to prevent extreme outliers
+ # 標準化語義分數以防止極端異常值
if semantic_std > 0:
normalized_semantic = (base_semantic - semantic_mean) / semantic_std
- normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # Cap at 2 standard deviations
- scaled_semantic = 0.5 + (normalized_semantic * 0.1) # Map to 0.3-0.7 range
+ normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # 限制在 2 個標準差
+ scaled_semantic = 0.5 + (normalized_semantic * 0.1) # 映射到 0.3-0.7 範圍
else:
scaled_semantic = 0.5
- # Get breed characteristics
+ # 獲取品種特徵
breed_info = get_dog_description(breed) if breed != 'Unknown' else {}
breed_size = breed_info.get('Size', '').lower() if breed_info else ''
exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else ''
- # Calculate feature matching score (more important than pure semantic similarity)
+ # 計算特徵匹配分數(比純語義相似度更重要)
feature_score = 0.0
user_text = user_input.lower()
- # Size and space requirements (high weight)
+ # 尺寸和空間需求(高權重)
if any(term in user_text for term in ['apartment', 'small', 'limited space']):
if 'small' in breed_size:
feature_score += 0.25
@@ -1087,7 +349,7 @@ class SemanticBreedRecommender:
elif 'large' in breed_size or 'giant' in breed_size:
feature_score -= 0.30
- # Exercise requirements (high weight)
+ # 運動需求(高權重)
if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']):
if 'low' in exercise_needs or 'minimal' in exercise_needs:
feature_score += 0.20
@@ -1099,7 +361,7 @@ class SemanticBreedRecommender:
elif 'low' in exercise_needs:
feature_score -= 0.15
- # Family compatibility
+ # 家庭相容性
if any(term in user_text for term in ['children', 'kids', 'family']):
good_with_children = breed_info.get('Good with Children', '') if breed_info else ''
if good_with_children == 'Yes':
@@ -1107,42 +369,42 @@ class SemanticBreedRecommender:
elif good_with_children == 'No':
feature_score -= 0.20
- # Combine scores with balanced weights
+ # 平衡權重組合分數
final_score = (
- scaled_semantic * 0.35 + # Reduced semantic weight
- feature_score * 0.45 + # Increased feature matching weight
+ scaled_semantic * 0.35 + # 降低語義權重
+ feature_score * 0.45 + # 增加特徵匹配權重
breed_data['lifestyle_bonus'] * 0.15 +
breed_data['comparative_bonus'] * 0.05
)
- # Calculate base compatibility score
+ # 計算基本相容性分數
base_compatibility = final_score
- # Apply dynamic scoring with natural distribution
- if base_compatibility >= 0.9: # Exceptional matches
+ # 應用自然分佈的動態評分
+ if base_compatibility >= 0.9: # 例外匹配
score_range = (0.92, 0.98)
position = (base_compatibility - 0.9) / 0.1
- elif base_compatibility >= 0.75: # Excellent matches
+ elif base_compatibility >= 0.75: # 優秀匹配
score_range = (0.85, 0.91)
position = (base_compatibility - 0.75) / 0.15
- elif base_compatibility >= 0.6: # Good matches
+ elif base_compatibility >= 0.6: # 良好匹配
score_range = (0.75, 0.84)
position = (base_compatibility - 0.6) / 0.15
- elif base_compatibility >= 0.45: # Fair matches
+ elif base_compatibility >= 0.45: # 公平匹配
score_range = (0.65, 0.74)
position = (base_compatibility - 0.45) / 0.15
- elif base_compatibility >= 0.3: # Poor matches
+ elif base_compatibility >= 0.3: # 較差匹配
score_range = (0.55, 0.64)
position = (base_compatibility - 0.3) / 0.15
- else: # Very poor matches
+ else: # 非常差的匹配
score_range = (0.45, 0.54)
position = max(0, base_compatibility / 0.3)
- # Calculate final score with natural variation
+ # 計算帶自然變化的最終分數
score_span = score_range[1] - score_range[0]
base_score = score_range[0] + (position * score_span)
- # Add controlled random variation for natural ranking
+ # 添加控制的隨機變化以進行自然排名
random.seed(hash(breed + user_input[:15]))
variation = random.uniform(-0.015, 0.015)
display_score = round(max(0.45, min(0.98, base_score + variation)), 3)
@@ -1155,24 +417,24 @@ class SemanticBreedRecommender:
'lifestyle_bonus': breed_data['lifestyle_bonus']
})
- # Sort by display score to ensure ranking consistency
+ # 按顯示分數排序以確保排名一致性
breed_display_scores.sort(key=lambda x: x['display_score'], reverse=True)
top_breeds = breed_display_scores[:top_k]
- # Convert to standard recommendation format
+ # 轉換為標準推薦格式
recommendations = []
for i, breed_data in enumerate(top_breeds):
breed = breed_data['breed']
display_score = breed_data['display_score']
- # Get detailed information
+ # 獲取詳細信息
breed_info = get_dog_description(breed)
recommendation = {
'breed': breed.replace('_', ' '),
'rank': i + 1,
- 'overall_score': display_score, # Use display score for consistency
- 'final_score': display_score, # Ensure final_score matches overall_score
+ 'overall_score': display_score, # 使用顯示分數以保持一致性
+ 'final_score': display_score, # 確保 final_score 與 overall_score 匹配
'semantic_score': breed_data['semantic_score'],
'comparative_bonus': breed_data['comparative_bonus'],
'lifestyle_bonus': breed_data['lifestyle_bonus'],
@@ -1196,158 +458,6 @@ class SemanticBreedRecommender:
print(traceback.format_exc())
return []
- def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any],
- lifestyle_keywords: Dict[str, List[str]]) -> float:
- """Enhanced lifestyle matching bonus calculation"""
- bonus = 0.0
- penalties = 0.0
-
- # Enhanced size matching
- breed_size = breed_characteristics.get('size', '').lower()
- size_prefs = lifestyle_keywords.get('size_preference', [])
- for pref in size_prefs:
- if pref in breed_size:
- bonus += 0.25 # Strong reward for size match
- elif (pref == 'small' and 'large' in breed_size) or \
- (pref == 'large' and 'small' in breed_size):
- penalties += 0.15 # Penalty for size mismatch
-
- # Enhanced activity level matching
- breed_exercise = breed_characteristics.get('exercise_needs', '').lower()
- activity_prefs = lifestyle_keywords.get('activity_level', [])
-
- if 'high' in activity_prefs:
- if 'high' in breed_exercise or 'very high' in breed_exercise:
- bonus += 0.2
- elif 'low' in breed_exercise:
- penalties += 0.2
- elif 'low' in activity_prefs:
- if 'low' in breed_exercise:
- bonus += 0.2
- elif 'high' in breed_exercise or 'very high' in breed_exercise:
- penalties += 0.25
- elif 'moderate' in activity_prefs:
- if 'moderate' in breed_exercise:
- bonus += 0.15
-
- # Enhanced family situation matching
- good_with_children = breed_characteristics.get('good_with_children', 'Yes')
- family_prefs = lifestyle_keywords.get('family_situation', [])
-
- if 'children' in family_prefs:
- if good_with_children == 'Yes':
- bonus += 0.15
- else:
- penalties += 0.3 # Strong penalty for non-child-friendly breeds
-
- # Enhanced living space matching
- living_prefs = lifestyle_keywords.get('living_space', [])
- if 'apartment' in living_prefs:
- if 'small' in breed_size:
- bonus += 0.2
- elif 'medium' in breed_size and 'low' in breed_exercise:
- bonus += 0.1
- elif 'large' in breed_size or 'giant' in breed_size:
- penalties += 0.2 # Penalty for large dogs in apartments
-
- # Noise preference matching
- noise_prefs = lifestyle_keywords.get('noise_preference', [])
- temperament = breed_characteristics.get('temperament', '').lower()
-
- if 'low' in noise_prefs:
- # Reward quiet breeds
- if any(term in temperament for term in ['gentle', 'calm', 'quiet']):
- bonus += 0.1
-
- # Care level matching
- grooming_needs = breed_characteristics.get('grooming_needs', '').lower()
- care_prefs = lifestyle_keywords.get('care_level', [])
-
- if 'low' in care_prefs and 'low' in grooming_needs:
- bonus += 0.1
- elif 'high' in care_prefs and 'high' in grooming_needs:
- bonus += 0.1
- elif 'low' in care_prefs and 'high' in grooming_needs:
- penalties += 0.15
-
- # Special needs matching
- special_needs = lifestyle_keywords.get('special_needs', [])
-
- if 'guard' in special_needs:
- if any(term in temperament for term in ['protective', 'alert', 'watchful']):
- bonus += 0.1
- elif 'companion' in special_needs:
- if any(term in temperament for term in ['affectionate', 'gentle', 'loyal']):
- bonus += 0.1
-
- # Calculate final bonus with penalties
- final_bonus = bonus - penalties
- return max(-0.3, min(0.5, final_bonus)) # Allow negative bonus but limit range
-
- def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]:
- """Convert standardized breed info to dictionary format"""
- try:
- size_map = {1: 'Tiny', 2: 'Small', 3: 'Medium', 4: 'Large', 5: 'Giant'}
- exercise_map = {1: 'Low', 2: 'Moderate', 3: 'High', 4: 'Very High'}
- care_map = {1: 'Low', 2: 'Moderate', 3: 'High'}
-
- return {
- 'Size': size_map.get(standardized_info.size_category, 'Medium'),
- 'Exercise Needs': exercise_map.get(standardized_info.exercise_level, 'Moderate'),
- 'Grooming Needs': care_map.get(standardized_info.care_complexity, 'Moderate'),
- 'Good with Children': 'Yes' if standardized_info.child_compatibility >= 0.8 else
- 'No' if standardized_info.child_compatibility <= 0.2 else 'Unknown',
- 'Temperament': 'Varies by individual',
- 'Lifespan': '10-12 years',
- 'Description': f'A {size_map.get(standardized_info.size_category, "medium")} sized breed'
- }
- except Exception as e:
- print(f"Error converting standardized info: {str(e)}")
- return {}
-
- def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]:
- """Get fallback recommendations when enhanced system fails"""
- try:
- safe_breeds = [
- ('Labrador Retriever', 0.85),
- ('Golden Retriever', 0.82),
- ('Cavalier King Charles Spaniel', 0.80),
- ('French Bulldog', 0.78),
- ('Boston Terrier', 0.76),
- ('Bichon Frise', 0.74),
- ('Pug', 0.72),
- ('Cocker Spaniel', 0.70)
- ]
-
- recommendations = []
- for i, (breed, score) in enumerate(safe_breeds[:top_k]):
- breed_info = get_dog_description(breed.replace(' ', '_')) or {}
-
- recommendation = {
- 'breed': breed,
- 'rank': i + 1,
- 'overall_score': score,
- 'final_score': score,
- 'semantic_score': score * 0.8,
- 'comparative_bonus': 0.0,
- 'lifestyle_bonus': 0.0,
- 'size': breed_info.get('Size', 'Unknown'),
- 'temperament': breed_info.get('Temperament', ''),
- 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
- 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
- 'good_with_children': breed_info.get('Good with Children', 'Yes'),
- 'lifespan': breed_info.get('Lifespan', '10-12 years'),
- 'description': breed_info.get('Description', ''),
- 'search_type': 'fallback'
- }
- recommendations.append(recommendation)
-
- return recommendations
-
- except Exception as e:
- print(f"Error generating fallback recommendations: {str(e)}")
- return []
-
def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""簡化的增強推薦方法"""
try:
@@ -1364,137 +474,34 @@ class SemanticBreedRecommender:
def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]:
"""增強用戶描述分析"""
- text = user_description.lower()
- analysis = {
- 'mentioned_breeds': [],
- 'lifestyle_keywords': {},
- 'preference_strength': {},
- 'constraint_requirements': [],
- 'user_context': {}
- }
-
- # 提取提及的品種
- for breed in self.breed_list:
- breed_display = breed.replace('_', ' ').lower()
- if breed_display in text or any(word in text for word in breed_display.split()):
- analysis['mentioned_breeds'].append(breed)
- # 簡單偏好強度分析
- if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']):
- analysis['preference_strength'][breed] = 0.8
- else:
- analysis['preference_strength'][breed] = 0.5
-
- # 提取約束要求
- if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']):
- analysis['constraint_requirements'].append('low_noise')
- if any(word in text for word in ['apartment', 'small space', '公寓']):
- analysis['constraint_requirements'].append('apartment_suitable')
- if any(word in text for word in ['children', 'kids', 'family', '小孩']):
- analysis['constraint_requirements'].append('child_friendly')
-
- # 提取用戶背景
- analysis['user_context'] = {
- 'has_children': any(word in text for word in ['children', 'kids', '小孩']),
- 'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house',
- 'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate',
- 'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']),
- 'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate'
- }
-
- return analysis
+ return self.query_analyzer.analyze_user_description_enhanced(user_description)
def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences:
"""從分析結果創建用戶偏好物件"""
- context = analysis['user_context']
-
- # 推斷居住空間類型
- living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small'
-
- # 推斷院子權限
- yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard'
-
- # 推斷運動時間
- activity_level = context.get('activity_level', 'moderate')
- exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30}
- exercise_time = exercise_time_map.get(activity_level, 60)
-
- # 推斷運動類型
- exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'}
- exercise_type = exercise_type_map.get(activity_level, 'moderate_activity')
-
- # 推斷噪音容忍度
- noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium'
-
- return UserPreferences(
- living_space=living_space,
- yard_access=yard_access,
- exercise_time=exercise_time,
- exercise_type=exercise_type,
- grooming_commitment='medium',
- experience_level=context.get('experience_level', 'intermediate'),
- time_availability='moderate',
- has_children=context.get('has_children', False),
- children_age='school_age' if context.get('has_children', False) else None,
- noise_tolerance=noise_tolerance,
- space_for_play=(living_space != 'apartment'),
- other_pets=False,
- climate='moderate',
- health_sensitivity='medium',
- barking_acceptance=noise_tolerance,
- size_preference='no_preference'
- )
+ return self.query_analyzer.create_user_preferences_from_analysis_enhanced(analysis)
def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]:
"""獲取候選品種列表"""
- candidate_breeds = set()
-
- # 如果提及特定品種,優先包含
- if analysis['mentioned_breeds']:
- candidate_breeds.update(analysis['mentioned_breeds'])
-
- # 根據約束要求過濾品種
- if 'apartment_suitable' in analysis['constraint_requirements']:
- apartment_suitable = [
- 'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier',
- 'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu'
- ]
- candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list)
-
- if 'child_friendly' in analysis['constraint_requirements']:
- child_friendly = [
- 'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel',
- 'Bichon_Frise', 'Poodle', 'Cocker_Spaniel'
- ]
- candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list)
-
- # 如果候選品種不足,添加更多通用品種
- if len(candidate_breeds) < 20:
- general_breeds = [
- 'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog',
- 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier',
- 'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer',
- 'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua'
- ]
- candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list)
-
- return list(candidate_breeds)[:30] # 限制候選數量以提高效率
+ return self.query_analyzer.get_candidate_breeds_enhanced(analysis)
def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float:
"""應用約束過濾,返回調整分數"""
- penalty = 0.0
+ # 這個方法需要從 score_calculator 調用適當的方法
+ # 但原始實現中沒有這個具體方法,所以我們提供基本實現
+ constraint_penalty = 0.0
breed_info = get_dog_description(breed)
if not breed_info:
- return penalty
+ return constraint_penalty
# 低噪音要求
if 'low_noise' in analysis['constraint_requirements']:
noise_info = breed_noise_info.get(breed, {})
noise_level = noise_info.get('noise_level', 'moderate').lower()
if 'high' in noise_level:
- penalty -= 0.3 # 嚴重扣分
+ constraint_penalty -= 0.3 # 嚴重扣分
elif 'low' in noise_level:
- penalty += 0.1 # 輕微加分
+ constraint_penalty += 0.1 # 輕微加分
# 公寓適合性
if 'apartment_suitable' in analysis['constraint_requirements']:
@@ -1502,76 +509,58 @@ class SemanticBreedRecommender:
exercise_needs = breed_info.get('Exercise Needs', '').lower()
if size in ['large', 'giant']:
- penalty -= 0.2
+ constraint_penalty -= 0.2
elif size in ['small', 'tiny']:
- penalty += 0.1
+ constraint_penalty += 0.1
if 'high' in exercise_needs:
- penalty -= 0.15
+ constraint_penalty -= 0.15
# 兒童友善性
if 'child_friendly' in analysis['constraint_requirements']:
good_with_children = breed_info.get('Good with Children', 'Unknown')
if good_with_children == 'Yes':
- penalty += 0.15
+ constraint_penalty += 0.15
elif good_with_children == 'No':
- penalty -= 0.4 # 嚴重扣分
+ constraint_penalty -= 0.4 # 嚴重扣分
- return penalty
+ return constraint_penalty
def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]:
"""獲取品種特徵"""
- breed_info = get_dog_description(breed)
- if not breed_info:
- return {}
-
- characteristics = {
- 'size': breed_info.get('Size', 'Unknown'),
- 'temperament': breed_info.get('Temperament', ''),
- 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
- 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
- 'good_with_children': breed_info.get('Good with Children', 'Unknown'),
- 'lifespan': breed_info.get('Lifespan', '10-12 years'),
- 'description': breed_info.get('Description', '')
- }
-
- # 添加噪音資訊
- noise_info = breed_noise_info.get(breed, {})
- characteristics['noise_level'] = noise_info.get('noise_level', 'moderate')
-
- return characteristics
+ return self.score_calculator.get_breed_characteristics_enhanced(breed)
def get_hybrid_recommendations(self, user_description: str,
user_preferences: Optional[Any] = None,
top_k: int = 15) -> List[Dict[str, Any]]:
"""
- Hybrid recommendations: Combine semantic matching with traditional scoring
+ 混合推薦:結合語義匹配與傳統評分
Args:
- user_description: User's natural language description
- user_preferences: Optional structured preference settings
- top_k: Number of recommendations to return
+ user_description: 用戶的自然語言描述
+ user_preferences: 可選的結構化偏好設置
+ top_k: 返回的推薦數量
Returns:
- Hybrid recommendation results
+ 混合推薦結果
"""
try:
- # Get semantic recommendations
+ # 獲取語義推薦
semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2)
if not user_preferences:
return semantic_recommendations[:top_k]
- # Combine with traditional scoring
+ # 與傳統評分結合
hybrid_results = []
for semantic_rec in semantic_recommendations:
breed_name = semantic_rec['breed'].replace(' ', '_')
- # Calculate traditional compatibility score
+ # 計算傳統相容性分數
traditional_score = calculate_compatibility_score(user_preferences, breed_name)
- # Hybrid score (semantic 40% + traditional 60%)
+ # 混合分數(語義 40% + 傳統 60%)
hybrid_score = (
semantic_rec['overall_score'] * 0.4 +
traditional_score * 0.6
@@ -1581,10 +570,10 @@ class SemanticBreedRecommender:
semantic_rec['traditional_score'] = traditional_score
hybrid_results.append(semantic_rec)
- # Re-sort by hybrid score
+ # 按混合分數重新排序
hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
- # Update rankings
+ # 更新排名
for i, result in enumerate(hybrid_results[:top_k]):
result['rank'] = i + 1
result['overall_score'] = result['hybrid_score']
@@ -1596,17 +585,14 @@ class SemanticBreedRecommender:
print(traceback.format_exc())
return self.get_semantic_recommendations(user_description, top_k)
+
def get_breed_recommendations_by_description(user_description: str,
user_preferences: Optional[Any] = None,
top_k: int = 15) -> List[Dict[str, Any]]:
- """Main interface function for getting breed recommendations by description"""
+ """基於描述獲取品種推薦的主要介面函數"""
try:
print("Initializing Enhanced SemanticBreedRecommender...")
recommender = SemanticBreedRecommender()
-
- # 嘗試載入SBERT模型(如果尚未載入)
- if not recommender.sbert_model:
- recommender._initialize_model()
# 優先使用整合統一評分系統的增強推薦
print("Using enhanced recommendation system with unified scoring")
@@ -1651,22 +637,14 @@ def get_enhanced_recommendations_with_unified_scoring(user_description: str, top
# 創建基本推薦器實例
recommender = SemanticBreedRecommender()
- # 嘗試載入SBERT模型(如果尚未載入)
- if not recommender.sbert_model:
- recommender._initialize_model()
-
- if not recommender.sbert_model:
+ if not recommender.vector_manager.is_model_available():
print("SBERT model not available, using basic text matching...")
# 使用基本文字匹配邏輯
- return _get_basic_text_matching_recommendations(user_description, top_k)
-
- # 確保breed vectors已建構
- if not recommender.breed_vectors:
- recommender._build_breed_vectors()
+ return _get_basic_text_matching_recommendations(user_description, top_k, recommender)
# 使用語意相似度推薦
recommendations = []
- user_embedding = recommender.sbert_model.encode(user_description)
+ user_embedding = recommender.vector_manager.encode_text(user_description)
# 計算所有品種的增強分數
all_breed_scores = []
@@ -1678,7 +656,7 @@ def get_enhanced_recommendations_with_unified_scoring(user_description: str, top
breed_info = get_dog_description(breed_name) or {}
# 計算增強的匹配分數
- enhanced_score = _calculate_enhanced_matching_score(
+ enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score(
breed_name, breed_info, user_description, similarity
)
@@ -1718,461 +696,20 @@ def get_enhanced_recommendations_with_unified_scoring(user_description: str, top
print(traceback.format_exc())
raise RuntimeError(error_msg) from e
-def _calculate_enhanced_matching_score(breed: str, breed_info: dict, user_description: str, base_similarity: float) -> dict:
- """計算增強的匹配分數,基於用戶描述和品種特性"""
- try:
- user_desc = user_description.lower()
-
- # 分析用戶需求
- space_requirements = _analyze_space_requirements(user_desc)
- exercise_requirements = _analyze_exercise_requirements(user_desc)
- noise_requirements = _analyze_noise_requirements(user_desc)
- size_requirements = _analyze_size_requirements(user_desc)
- family_requirements = _analyze_family_requirements(user_desc)
-
- # 獲取品種特性
- breed_size = breed_info.get('Size', '').lower()
- breed_exercise = breed_info.get('Exercise Needs', '').lower()
- breed_noise = breed_noise_info.get(breed, {}).get('noise_level', 'moderate').lower()
- breed_temperament = breed_info.get('Temperament', '').lower()
- breed_good_with_children = breed_info.get('Good with Children', '').lower()
-
- # 計算各維度匹配分數
- dimension_scores = {}
-
- # 空間匹配 (30% 權重)
- space_score = _calculate_space_compatibility(space_requirements, breed_size, breed_exercise)
- dimension_scores['space'] = space_score
-
- # 運動需求匹配 (25% 權重)
- exercise_score = _calculate_exercise_compatibility(exercise_requirements, breed_exercise)
- dimension_scores['exercise'] = exercise_score
-
- # 噪音匹配 (20% 權重)
- noise_score = _calculate_noise_compatibility(noise_requirements, breed_noise)
- dimension_scores['noise'] = noise_score
-
- # 體型匹配 (15% 權重)
- size_score = _calculate_size_compatibility(size_requirements, breed_size)
- dimension_scores['grooming'] = min(0.9, base_similarity + 0.1) # 美容需求基於語意相似度
-
- # 家庭相容性 (10% 權重)
- family_score = _calculate_family_compatibility(family_requirements, breed_good_with_children, breed_temperament)
- dimension_scores['family'] = family_score
- dimension_scores['experience'] = min(0.9, base_similarity + 0.05) # 經驗需求基於語意相似度
-
- # 應用硬約束過濾
- constraint_penalty = _apply_hard_constraints_enhanced(user_desc, breed_info)
-
- # 計算加權總分 - 精確化維度權重配置
- # 根據指導建議重新平衡維度權重
- weighted_score = (
- space_score * 0.30 + # 空間相容性(降低5%)
- exercise_score * 0.28 + # 運動需求匹配(降低2%)
- noise_score * 0.18 + # 噪音控制(提升3%)
- family_score * 0.12 + # 家庭相容性(提升2%)
- size_score * 0.08 + # 體型匹配(降低2%)
- min(0.9, base_similarity + 0.1) * 0.04 # 護理需求(新增獨立權重)
- )
-
- # 優化完美匹配獎勵機制 - 降低觸發門檻並增加層次
- perfect_match_bonus = 0.0
- if space_score >= 0.88 and exercise_score >= 0.88 and noise_score >= 0.85:
- perfect_match_bonus = 0.08 # 卓越匹配獎勵
- elif space_score >= 0.82 and exercise_score >= 0.82 and noise_score >= 0.75:
- perfect_match_bonus = 0.04 # 優秀匹配獎勵
- elif space_score >= 0.75 and exercise_score >= 0.75:
- perfect_match_bonus = 0.02 # 良好匹配獎勵
-
- # 結合語意相似度與維度匹配 - 調整為75%維度匹配 25%語義相似度
- base_combined_score = (weighted_score * 0.75 + base_similarity * 0.25) + perfect_match_bonus
-
- # 應用漸進式約束懲罰,但確保基礎分數保障
- raw_final_score = base_combined_score + constraint_penalty
-
- # 實施動態分數保障機制 - 提升至40-42%基礎分數
- # 根據品種特性動態調整基礎分數
- base_guaranteed_score = 0.42 # 提升基礎保障分數
-
- # 特殊品種基礎分數調整
- high_adaptability_breeds = ['French_Bulldog', 'Pug', 'Golden_Retriever', 'Labrador_Retriever']
- if any(breed in breed for breed in high_adaptability_breeds):
- base_guaranteed_score = 0.45 # 高適應性品種更高基礎分數
-
- # 動態分數分佈優化
- if raw_final_score >= base_guaranteed_score:
- # 對於高分品種,實施適度壓縮避免過度集中
- if raw_final_score > 0.85:
- compression_factor = 0.92 # 輕度壓縮高分
- final_score = 0.85 + (raw_final_score - 0.85) * compression_factor
- else:
- final_score = raw_final_score
- final_score = min(0.93, final_score) # 降低最高分數限制
- else:
- # 對於低分品種,使用改進的保障機制
- normalized_raw_score = max(0.15, raw_final_score)
- # 基礎保障75% + 實際計算25%,保持一定區分度
- final_score = base_guaranteed_score * 0.75 + normalized_raw_score * 0.25
- final_score = max(base_guaranteed_score, min(0.93, final_score))
-
- lifestyle_bonus = max(0.0, weighted_score - base_similarity)
-
- return {
- 'final_score': final_score,
- 'weighted_score': weighted_score,
- 'lifestyle_bonus': lifestyle_bonus,
- 'dimension_scores': dimension_scores,
- 'constraint_penalty': constraint_penalty
- }
-
- except Exception as e:
- print(f"Error in enhanced matching calculation for {breed}: {str(e)}")
- return {
- 'final_score': base_similarity,
- 'weighted_score': base_similarity,
- 'lifestyle_bonus': 0.0,
- 'dimension_scores': {
- 'space': base_similarity * 0.9,
- 'exercise': base_similarity * 0.85,
- 'grooming': base_similarity * 0.8,
- 'experience': base_similarity * 0.75,
- 'noise': base_similarity * 0.7,
- 'family': base_similarity * 0.65
- },
- 'constraint_penalty': 0.0
- }
-
-def _analyze_space_requirements(user_desc: str) -> dict:
- """分析空間需求 - 增強中等活動量識別"""
- requirements = {'type': 'unknown', 'size': 'medium', 'importance': 0.5}
-
- if any(word in user_desc for word in ['apartment', 'small apartment', 'small space', 'condo', 'flat']):
- requirements['type'] = 'apartment'
- requirements['size'] = 'small'
- requirements['importance'] = 0.95 # 提高重要性
- elif any(word in user_desc for word in ['medium-sized house', 'medium house', 'townhouse']):
- requirements['type'] = 'medium_house'
- requirements['size'] = 'medium'
- requirements['importance'] = 0.8 # 中等活動量用戶的特殊標記
- elif any(word in user_desc for word in ['large house', 'big house', 'yard', 'garden', 'large space', 'backyard']):
- requirements['type'] = 'house'
- requirements['size'] = 'large'
- requirements['importance'] = 0.7
-
- return requirements
-
-def _analyze_exercise_requirements(user_desc: str) -> dict:
- """分析運動需求 - 增強中等活動量識別"""
- requirements = {'level': 'moderate', 'importance': 0.5}
-
- # 低運動量識別
- if any(word in user_desc for word in ["don't exercise", "don't exercise much", "low exercise", "minimal", "lazy", "not active"]):
- requirements['level'] = 'low'
- requirements['importance'] = 0.95
- # 中等運動量的精確識別
- elif any(phrase in user_desc for phrase in ['30 minutes', 'half hour', 'moderate', 'balanced', 'walk about']):
- if 'walk' in user_desc or 'daily' in user_desc:
- requirements['level'] = 'moderate'
- requirements['importance'] = 0.85 # 中等活動量的特殊標記
- # 高運動量識別
- elif any(word in user_desc for word in ['active', 'hiking', 'outdoor activities', 'running', 'outdoors', 'love hiking']):
- requirements['level'] = 'high'
- requirements['importance'] = 0.9
-
- return requirements
-
-def _analyze_noise_requirements(user_desc: str) -> dict:
- """分析噪音需求"""
- requirements = {'tolerance': 'medium', 'importance': 0.5}
-
- if any(word in user_desc for word in ['quiet', 'no bark', "won't bark", "doesn't bark", 'silent', 'peaceful']):
- requirements['tolerance'] = 'low'
- requirements['importance'] = 0.9
- elif any(word in user_desc for word in ['loud', 'barking ok', 'noise ok']):
- requirements['tolerance'] = 'high'
- requirements['importance'] = 0.7
-
- return requirements
-
-def _analyze_size_requirements(user_desc: str) -> dict:
- """分析體型需求"""
- requirements = {'preferred': 'any', 'importance': 0.5}
-
- if any(word in user_desc for word in ['small', 'tiny', 'little', 'lap dog', 'compact']):
- requirements['preferred'] = 'small'
- requirements['importance'] = 0.8
- elif any(word in user_desc for word in ['large', 'big', 'giant']):
- requirements['preferred'] = 'large'
- requirements['importance'] = 0.8
-
- return requirements
-
-def _analyze_family_requirements(user_desc: str) -> dict:
- """分析家庭需求"""
- requirements = {'children': False, 'importance': 0.3}
-
- if any(word in user_desc for word in ['children', 'kids', 'family', 'child']):
- requirements['children'] = True
- requirements['importance'] = 0.8
-
- return requirements
-
-def _calculate_space_compatibility(space_req: dict, breed_size: str, breed_exercise: str) -> float:
- """計算空間相容性分數 - 增強中等活動量處理"""
- if space_req['type'] == 'apartment':
- if 'small' in breed_size or 'toy' in breed_size:
- base_score = 0.95
- elif 'medium' in breed_size:
- if 'low' in breed_exercise:
- base_score = 0.75
- else:
- base_score = 0.45 # 降低中型犬在公寓的分數
- elif 'large' in breed_size:
- base_score = 0.05 # 大型犬極度不適合公寓
- elif 'giant' in breed_size:
- base_score = 0.01 # 超大型犬完全不適合公寓
- else:
- base_score = 0.7
- elif space_req['type'] == 'medium_house':
- # 中型房屋的特殊處理 - 適合中等活動量用戶
- if 'small' in breed_size or 'toy' in breed_size:
- base_score = 0.9
- elif 'medium' in breed_size:
- base_score = 0.95 # 中型犬在中型房屋很適合
- elif 'large' in breed_size:
- if 'moderate' in breed_exercise or 'low' in breed_exercise:
- base_score = 0.8 # 低運動量大型犬還可以
- else:
- base_score = 0.6 # 高運動量大型犬不太適合
- elif 'giant' in breed_size:
- base_score = 0.3 # 超大型犬在中型房屋不太適合
- else:
- base_score = 0.85
- else:
- # 大型房屋的情況
- if 'small' in breed_size or 'toy' in breed_size:
- base_score = 0.85
- elif 'medium' in breed_size:
- base_score = 0.9
- elif 'large' in breed_size or 'giant' in breed_size:
- base_score = 0.95
- else:
- base_score = 0.8
-
- return min(0.95, base_score)
-
-def _calculate_exercise_compatibility(exercise_req: dict, breed_exercise: str) -> float:
- """計算運動需求相容性分數 - 增強中等活動量處理"""
- if exercise_req['level'] == 'low':
- if 'low' in breed_exercise or 'minimal' in breed_exercise:
- return 0.95
- elif 'moderate' in breed_exercise:
- return 0.5 # 降低不匹配分數
- elif 'high' in breed_exercise:
- return 0.1 # 進一步降低高運動需求的匹配
- else:
- return 0.7
- elif exercise_req['level'] == 'high':
- if 'high' in breed_exercise:
- return 0.95
- elif 'moderate' in breed_exercise:
- return 0.8
- elif 'low' in breed_exercise:
- return 0.6
- else:
- return 0.7
- else: # moderate - 中等活動量的精確處理
- if 'moderate' in breed_exercise:
- return 0.95 # 完美匹配
- elif 'low' in breed_exercise:
- return 0.85 # 低運動需求的品種對中等活動量用戶也不錯
- elif 'high' in breed_exercise:
- return 0.5 # 中等活動量用戶不太適合高運動需求品種
- else:
- return 0.75
-
- return 0.6
-
-def _calculate_noise_compatibility(noise_req: dict, breed_noise: str) -> float:
- """計算噪音相容性分數,更好處理複合等級"""
- breed_noise_lower = breed_noise.lower()
-
- if noise_req['tolerance'] == 'low':
- if 'low' in breed_noise_lower and 'moderate' not in breed_noise_lower:
- return 0.95 # 純低噪音
- elif 'low-moderate' in breed_noise_lower or 'low to moderate' in breed_noise_lower:
- return 0.8 # 低到中等噪音,還可接受
- elif breed_noise_lower in ['moderate']:
- return 0.4 # 中等噪音有些問題
- elif 'high' in breed_noise_lower:
- return 0.1 # 高噪音不適合
- else:
- return 0.6 # 未知噪音水平,保守估計
- elif noise_req['tolerance'] == 'high':
- if 'high' in breed_noise_lower:
- return 0.9
- elif 'moderate' in breed_noise_lower:
- return 0.85
- elif 'low' in breed_noise_lower:
- return 0.8 # 安靜犬對高容忍度的人也很好
- else:
- return 0.8
- else: # moderate tolerance
- if 'moderate' in breed_noise_lower:
- return 0.9
- elif 'low' in breed_noise_lower:
- return 0.85
- elif 'high' in breed_noise_lower:
- return 0.6
- else:
- return 0.75
-
- return 0.7
-
-def _calculate_size_compatibility(size_req: dict, breed_size: str) -> float:
- """計算體型相容性分數"""
- if size_req['preferred'] == 'small':
- if any(word in breed_size for word in ['small', 'toy', 'tiny']):
- return 0.9
- elif 'medium' in breed_size:
- return 0.6
- else:
- return 0.3
- elif size_req['preferred'] == 'large':
- if any(word in breed_size for word in ['large', 'giant']):
- return 0.9
- elif 'medium' in breed_size:
- return 0.7
- else:
- return 0.4
-
- return 0.7 # 無特別偏好
-
-def _calculate_family_compatibility(family_req: dict, good_with_children: str, temperament: str) -> float:
- """計算家庭相容性分數"""
- if family_req['children']:
- if 'yes' in good_with_children.lower():
- return 0.9
- elif any(word in temperament for word in ['gentle', 'patient', 'friendly']):
- return 0.8
- elif 'no' in good_with_children.lower():
- return 0.2
- else:
- return 0.6
-
- return 0.7
-
-def _apply_hard_constraints_enhanced(user_desc: str, breed_info: dict) -> float:
- """應用品種特性感知的動態懲罰機制"""
- penalty = 0.0
-
- # 建立懲罰衰減係數和補償機制
- penalty_decay_factor = 0.7
- breed_adaptability_bonus = 0.0
- breed_size = breed_info.get('Size', '').lower()
- breed_exercise = breed_info.get('Exercise Needs', '').lower()
- breed_name = breed_info.get('Breed', '').replace(' ', '_')
-
- # 公寓空間約束 - 品種特性感知懲罰機制
- if 'apartment' in user_desc or 'small apartment' in user_desc:
- if 'giant' in breed_size:
- base_penalty = -0.35 # 減少基礎懲罰
- # 特定品種適應性補償
- adaptable_giants = ['Mastiff', 'Great Dane'] # 相對安靜的巨型犬
- if any(adapt_breed in breed_name for adapt_breed in adaptable_giants):
- breed_adaptability_bonus += 0.08
- penalty += base_penalty * penalty_decay_factor
- elif 'large' in breed_size:
- base_penalty = -0.25 # 減少大型犬懲罰
- # 適合公寓的大型犬補償
- apartment_friendly_large = ['Greyhound', 'Great_Dane']
- if any(apt_breed in breed_name for apt_breed in apartment_friendly_large):
- breed_adaptability_bonus += 0.06
- penalty += base_penalty * penalty_decay_factor
- elif 'medium' in breed_size and 'high' in breed_exercise:
- penalty += -0.15 * penalty_decay_factor # 進一步減少懲罰
-
- # 運動需求不匹配 - 品種特性感知懲罰機制
- if any(phrase in user_desc for phrase in ["don't exercise", "not active", "low exercise", "don't exercise much"]):
- if 'high' in breed_exercise:
- base_penalty = -0.28 # 減少基礎懲罰
- # 低維護高運動犬種補償
- adaptable_high_energy = ['Greyhound', 'Whippet'] # 運動爆發型,平時安靜
- if any(adapt_breed in breed_name for adapt_breed in adaptable_high_energy):
- breed_adaptability_bonus += 0.10
- penalty += base_penalty * penalty_decay_factor
- elif 'moderate' in breed_exercise:
- penalty += -0.08 * penalty_decay_factor # 進一步減少懲罰
-
- # 噪音控制需求不匹配 - 品種特性感知懲罰機制
- if any(phrase in user_desc for phrase in ['quiet', "won't bark", "doesn't bark", "silent"]):
- breed_noise = breed_noise_info.get(breed_name, {}).get('noise_level', 'moderate').lower()
- if 'high' in breed_noise:
- base_penalty = -0.18 # 減少基礎懲罰
- # 訓練性良好的高噪音品種補償
- trainable_vocal_breeds = ['German_Shepherd', 'Golden_Retriever']
- if any(train_breed in breed_name for train_breed in trainable_vocal_breeds):
- breed_adaptability_bonus += 0.05
- penalty += base_penalty * penalty_decay_factor
- elif 'moderate' in breed_noise and 'low' not in breed_noise:
- penalty += -0.05 * penalty_decay_factor
-
- # 體型偏好不匹配 - 漸進式懲罰
- if any(phrase in user_desc for phrase in ['small', 'tiny', 'little']):
- if 'giant' in breed_size:
- penalty -= 0.35 # 超大型犬懲罰
- elif 'large' in breed_size:
- penalty -= 0.20 # 大型犬懲罰
-
- # 中等活動量用戶的特殊約束處理 - 漸進式懲罰
- moderate_activity_terms = ['30 minutes', 'half hour', 'moderate', 'balanced', 'medium-sized house']
- if any(term in user_desc for term in moderate_activity_terms):
- # 超大型犬對中等活動量用戶的適度懲罰
- giant_breeds = ['Saint Bernard', 'Tibetan Mastiff', 'Great Dane', 'Mastiff', 'Newfoundland']
- if any(giant in breed_name for giant in giant_breeds) or 'giant' in breed_size:
- penalty -= 0.35 # 適度懲罰,不完全排除
-
- # 中型房屋 + 超大型犬的額外考量
- if 'medium-sized house' in user_desc and any(giant in breed_name for giant in giant_breeds):
- if not any(high_activity in user_desc for high_activity in ['hiking', 'running', 'active', 'outdoor activities']):
- penalty -= 0.15 # 輕度額外懲罰
-
- # 30分鐘散步對極高運動需求品種的懲罰
- if any(term in user_desc for term in ['30 minutes', 'half hour']) and 'walk' in user_desc:
- high_energy_breeds = ['Siberian Husky', 'Border Collie', 'Jack Russell Terrier', 'Weimaraner']
- if any(he_breed in breed_name for he_breed in high_energy_breeds) and 'high' in breed_exercise:
- penalty -= 0.25 # 適度懲罰極高運動需求品種
-
- # 添加特殊品種適應性補償機制
- # 對於邊界適配品種,給予適度補償
- boundary_adaptable_breeds = {
- 'Italian_Greyhound': 0.08, # 安靜、低維護的小型犬
- 'Boston_Bull': 0.06, # 適應性強的小型犬
- 'Havanese': 0.05, # 友好適應的小型犬
- 'Silky_terrier': 0.04, # 安靜的玩具犬
- 'Basset': 0.07 # 低能量但友好的中型犬
- }
-
- if breed_name in boundary_adaptable_breeds:
- breed_adaptability_bonus += boundary_adaptable_breeds[breed_name]
-
- # 應用品種適應性補償並設置懲罰上限
- final_penalty = penalty + breed_adaptability_bonus
- # 限制最大懲罰,避免單一約束主導評分
- final_penalty = max(-0.4, final_penalty)
-
- return final_penalty
-
-def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]:
+def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15, recommender=None) -> List[Dict[str, Any]]:
"""基本文字匹配推薦(SBERT 不可用時的後備方案)"""
try:
print("Using basic text matching as fallback...")
+ # 如果沒有提供 recommender,創建一個新的
+ if recommender is None:
+ recommender = SemanticBreedRecommender()
+
# 基本關鍵字匹配
keywords = user_description.lower().split()
breed_scores = []
- # 從數據庫獲取品種清單
+ # 從數據庫獲取品種清單或使用預設清單
try:
conn = sqlite3.connect('animal_detector.db')
cursor = conn.cursor()
@@ -2180,13 +717,15 @@ def _get_basic_text_matching_recommendations(user_description: str, top_k: int =
basic_breeds = [row[0] for row in cursor.fetchall()]
cursor.close()
conn.close()
+ # 過濾掉野生動物品種
+ basic_breeds = [breed for breed in basic_breeds if breed != 'Dhole']
except Exception as e:
print(f"Could not load breed list from database: {str(e)}")
# 後備品種清單
basic_breeds = [
'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog',
'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier',
- 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih-Tzu',
+ 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih_Tzu',
'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier',
'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa'
]
@@ -2200,7 +739,7 @@ def _get_basic_text_matching_recommendations(user_description: str, top_k: int =
base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6)
# 應用增強匹配邏輯
- enhanced_score = _calculate_enhanced_matching_score(
+ enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score(
breed, breed_info, user_description, base_score
)
@@ -2243,4 +782,4 @@ def _get_basic_text_matching_recommendations(user_description: str, top_k: int =
except Exception as e:
error_msg = f"Error in basic text matching: {str(e)}"
print(f"ERROR: {error_msg}")
- raise RuntimeError(error_msg) from e
\ No newline at end of file
+ raise RuntimeError(error_msg) from e
diff --git a/semantic_vector_manager.py b/semantic_vector_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..b25498b064d9e7f928801fd865b8fd629eb96f94
--- /dev/null
+++ b/semantic_vector_manager.py
@@ -0,0 +1,385 @@
+import random
+import hashlib
+import numpy as np
+import sqlite3
+import re
+import traceback
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+from sentence_transformers import SentenceTransformer
+import torch
+from sklearn.metrics.pairwise import cosine_similarity
+from dog_database import get_dog_description
+from breed_health_info import breed_health_info
+from breed_noise_info import breed_noise_info
+
+@dataclass
+class BreedDescriptionVector:
+ """品種描述向量的資料結構"""
+ breed_name: str
+ description_text: str
+ embedding: np.ndarray
+ characteristics: Dict[str, Any]
+
+class SemanticVectorManager:
+ """
+ 語義向量管理器
+ 處理 SBERT 模型初始化、品種向量化建構和品種描述生成
+ """
+
+ def __init__(self):
+ """初始化語義向量管理器"""
+ self.model_name = 'all-MiniLM-L6-v2'
+ self.sbert_model = None
+ self._sbert_loading_attempted = False
+ self.breed_vectors = {}
+ self.breed_list = self._get_breed_list()
+ # 延遲SBERT模型載入直到需要時才在GPU環境中進行
+ print("SemanticVectorManager initialized (SBERT loading deferred)")
+
+ def _get_breed_list(self) -> List[str]:
+ """從資料庫獲取品種清單"""
+ try:
+ conn = sqlite3.connect('animal_detector.db')
+ cursor = conn.cursor()
+ cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog")
+ breeds = [row[0] for row in cursor.fetchall()]
+ cursor.close()
+ conn.close()
+ # 過濾掉野生動物品種
+ breeds = [breed for breed in breeds if breed != 'Dhole']
+ return breeds
+ except Exception as e:
+ print(f"Error getting breed list: {str(e)}")
+ return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever',
+ 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier']
+
+ def _initialize_model(self):
+ """初始化 SBERT 模型,包含容錯機制 - 設計用於ZeroGPU相容性"""
+ if self.sbert_model is not None or self._sbert_loading_attempted:
+ return self.sbert_model
+
+ try:
+ print("Loading SBERT model in GPU context...")
+ # 如果主要模型失敗,嘗試不同的模型名稱
+ model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2']
+
+ for model_name in model_options:
+ try:
+ # 明確指定設備以處理ZeroGPU環境
+ import torch
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ self.sbert_model = SentenceTransformer(model_name, device=device)
+ self.model_name = model_name
+ print(f"SBERT model {model_name} loaded successfully on {device}")
+ return self.sbert_model
+ except Exception as model_e:
+ print(f"Failed to load {model_name}: {str(model_e)}")
+ continue
+
+ # 如果所有模型都失敗
+ print("All SBERT models failed to load. Using basic text matching fallback.")
+ self.sbert_model = None
+ return None
+
+ except Exception as e:
+ print(f"Failed to initialize any SBERT model: {str(e)}")
+ print(traceback.format_exc())
+ print("Will provide basic text-based recommendations without embeddings")
+ self.sbert_model = None
+ return None
+ finally:
+ self._sbert_loading_attempted = True
+
+ def _create_breed_description(self, breed: str) -> str:
+ """為品種創建包含所有關鍵特徵的全面自然語言描述"""
+ try:
+ # 獲取所有信息來源
+ breed_info = get_dog_description(breed) or {}
+ health_info = breed_health_info.get(breed, {}) if breed_health_info else {}
+ noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {}
+
+ breed_display_name = breed.replace('_', ' ')
+ description_parts = []
+
+ # 1. 基本尺寸和身體特徵
+ size = breed_info.get('Size', 'medium').lower()
+ description_parts.append(f"{breed_display_name} is a {size} sized dog breed")
+
+ # 2. 氣質和個性(匹配的關鍵因素)
+ temperament = breed_info.get('Temperament', '')
+ if temperament:
+ description_parts.append(f"with a {temperament.lower()} temperament")
+
+ # 3. 運動和活動水平(公寓居住的關鍵因素)
+ exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower()
+ if 'high' in exercise_needs or 'very high' in exercise_needs:
+ description_parts.append("requiring high daily exercise and mental stimulation")
+ elif 'low' in exercise_needs or 'minimal' in exercise_needs:
+ description_parts.append("with minimal exercise requirements, suitable for apartment living")
+ else:
+ description_parts.append("with moderate exercise needs")
+
+ # 4. 噪音特徵(安靜需求的關鍵因素)
+ noise_level = noise_info.get('noise_level', 'moderate').lower()
+ if 'low' in noise_level or 'quiet' in noise_level:
+ description_parts.append("known for being quiet and rarely barking")
+ elif 'high' in noise_level or 'loud' in noise_level:
+ description_parts.append("tends to be vocal and bark frequently")
+ else:
+ description_parts.append("with moderate barking tendencies")
+
+ # 5. 居住空間相容性
+ if size in ['small', 'tiny']:
+ description_parts.append("excellent for small apartments and limited spaces")
+ elif size in ['large', 'giant']:
+ description_parts.append("requiring large living spaces and preferably a yard")
+ else:
+ description_parts.append("adaptable to various living situations")
+
+ # 6. 美容和維護
+ grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower()
+ if 'high' in grooming_needs:
+ description_parts.append("requiring regular professional grooming")
+ elif 'low' in grooming_needs:
+ description_parts.append("with minimal grooming requirements")
+ else:
+ description_parts.append("with moderate grooming needs")
+
+ # 7. 家庭相容性
+ good_with_children = breed_info.get('Good with Children', 'Yes')
+ if good_with_children == 'Yes':
+ description_parts.append("excellent with children and families")
+ else:
+ description_parts.append("better suited for adult households")
+
+ # 8. 智力和可訓練性(從資料庫描述中提取)
+ intelligence_keywords = []
+ description_text = breed_info.get('Description', '').lower()
+
+ if description_text:
+ # 從描述中提取智力指標
+ if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']):
+ intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner'])
+ elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']):
+ intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train'])
+ else:
+ intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency'])
+
+ # 從描述中提取工作/用途特徵
+ if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']):
+ intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented'])
+ elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']):
+ intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused'])
+
+ # 添加智力背景到描述中
+ if intelligence_keywords:
+ description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}")
+
+ # 9. 特殊特徵和用途(使用資料庫挖掘進行增強)
+ if breed_info.get('Description'):
+ desc = breed_info.get('Description', '')[:150] # 增加到 150 字元以提供更多背景
+ if desc:
+ # 從描述中提取關鍵特徵以便更好的語義匹配
+ desc_lower = desc.lower()
+ key_traits = []
+
+ # 從描述中提取關鍵行為特徵
+ if 'friendly' in desc_lower:
+ key_traits.append('friendly')
+ if 'gentle' in desc_lower:
+ key_traits.append('gentle')
+ if 'energetic' in desc_lower or 'active' in desc_lower:
+ key_traits.append('energetic')
+ if 'calm' in desc_lower or 'peaceful' in desc_lower:
+ key_traits.append('calm')
+ if 'protective' in desc_lower or 'guard' in desc_lower:
+ key_traits.append('protective')
+
+ trait_text = f" and {', '.join(key_traits)}" if key_traits else ""
+ description_parts.append(f"Known for: {desc.lower()}{trait_text}")
+
+ # 10. 照護水平需求
+ try:
+ care_level = breed_info.get('Care Level', 'moderate')
+ if isinstance(care_level, str):
+ description_parts.append(f"requiring {care_level.lower()} overall care level")
+ else:
+ description_parts.append("requiring moderate overall care level")
+ except Exception as e:
+ print(f"Error processing care level for {breed}: {str(e)}")
+ description_parts.append("requiring moderate overall care level")
+
+ # 11. 壽命資訊
+ try:
+ lifespan = breed_info.get('Lifespan', '10-12 years')
+ if lifespan and isinstance(lifespan, str) and lifespan.strip():
+ description_parts.append(f"with a typical lifespan of {lifespan}")
+ else:
+ description_parts.append("with a typical lifespan of 10-12 years")
+ except Exception as e:
+ print(f"Error processing lifespan for {breed}: {str(e)}")
+ description_parts.append("with a typical lifespan of 10-12 years")
+
+ # 創建全面的描述
+ full_description = '. '.join(description_parts) + '.'
+
+ # 添加全面的關鍵字以便更好的語義匹配
+ keywords = []
+
+ # 基本品種名稱關鍵字
+ keywords.extend([word.lower() for word in breed_display_name.split()])
+
+ # 氣質關鍵字
+ if temperament:
+ keywords.extend([word.lower().strip(',') for word in temperament.split()])
+
+ # 基於尺寸的關鍵字
+ if 'small' in size or 'tiny' in size:
+ keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap'])
+ elif 'large' in size or 'giant' in size:
+ keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor'])
+ else:
+ keywords.extend(['medium', 'moderate', 'average', 'balanced'])
+
+ # 活動水平關鍵字
+ exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower()
+ if 'high' in exercise_needs:
+ keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic'])
+ elif 'low' in exercise_needs:
+ keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary'])
+ else:
+ keywords.extend(['moderate', 'balanced', 'walks', 'regular'])
+
+ # 噪音水平關鍵字
+ noise_level = noise_info.get('noise_level', 'moderate').lower()
+ if 'quiet' in noise_level or 'low' in noise_level:
+ keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise'])
+ elif 'high' in noise_level or 'loud' in noise_level:
+ keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog'])
+
+ # 居住情況關鍵字
+ if size in ['small', 'tiny'] and 'low' in exercise_needs:
+ keywords.extend(['apartment', 'city', 'urban', 'small-space'])
+ if size in ['large', 'giant'] or 'high' in exercise_needs:
+ keywords.extend(['house', 'yard', 'suburban', 'rural', 'space'])
+
+ # 家庭關鍵字
+ good_with_children = breed_info.get('Good with Children', 'Yes')
+ if good_with_children == 'Yes':
+ keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle'])
+
+ # 智力和可訓練性關鍵字(從資料庫描述挖掘)
+ if intelligence_keywords:
+ keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()])
+
+ # 美容相關關鍵字(增強)
+ grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower()
+ if 'high' in grooming_needs:
+ keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care'])
+ elif 'low' in grooming_needs:
+ keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go'])
+ else:
+ keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care'])
+
+ # 基於壽命的關鍵字
+ lifespan = breed_info.get('Lifespan', '10-12 years')
+ if lifespan and isinstance(lifespan, str):
+ try:
+ # 從壽命字符串中提取年數(例如 "10-12 years" 或 "12-15 years")
+ import re
+ years = re.findall(r'\d+', lifespan)
+ if years:
+ avg_years = sum(int(y) for y in years) / len(years)
+ if avg_years >= 14:
+ keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan'])
+ elif avg_years <= 8:
+ keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care'])
+ else:
+ keywords.extend(['average-lifespan', 'moderate-longevity'])
+ except:
+ keywords.extend(['average-lifespan'])
+
+ # 將關鍵字添加到描述中以便更好的語義匹配
+ unique_keywords = list(set(keywords))
+ keyword_text = ' '.join(unique_keywords)
+ full_description += f" Additional context: {keyword_text}"
+
+ return full_description
+
+ except Exception as e:
+ print(f"Error creating description for {breed}: {str(e)}")
+ return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics."
+
+ def _build_breed_vectors(self):
+ """為所有品種建立向量表示 - 延遲調用當需要時"""
+ try:
+ print("Building breed vector database...")
+
+ # 初始化模型如果尚未完成
+ if self.sbert_model is None:
+ self._initialize_model()
+
+ # 如果模型不可用則跳過
+ if self.sbert_model is None:
+ print("SBERT model not available, skipping vector building")
+ return
+
+ for breed in self.breed_list:
+ description = self._create_breed_description(breed)
+
+ # 生成嵌入向量
+ embedding = self.sbert_model.encode(description, convert_to_tensor=False)
+
+ # 獲取品種特徵
+ breed_info = get_dog_description(breed)
+ characteristics = {
+ 'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium',
+ 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate',
+ 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate',
+ 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes',
+ 'temperament': breed_info.get('Temperament', '') if breed_info else ''
+ }
+
+ self.breed_vectors[breed] = BreedDescriptionVector(
+ breed_name=breed,
+ description_text=description,
+ embedding=embedding,
+ characteristics=characteristics
+ )
+
+ print(f"Successfully built {len(self.breed_vectors)} breed vectors")
+
+ except Exception as e:
+ print(f"Error building breed vectors: {str(e)}")
+ print(traceback.format_exc())
+ raise
+
+ def get_breed_vectors(self) -> Dict[str, BreedDescriptionVector]:
+ """獲取所有品種向量"""
+ # 確保向量已建構
+ if not self.breed_vectors:
+ self._build_breed_vectors()
+ return self.breed_vectors
+
+ def get_sbert_model(self) -> Optional[SentenceTransformer]:
+ """獲取 SBERT 模型"""
+ return self.sbert_model
+
+ def get_breed_list(self) -> List[str]:
+ """獲取品種清單"""
+ return self.breed_list
+
+ def is_model_available(self) -> bool:
+ """檢查 SBERT 模型是否可用"""
+ return self.sbert_model is not None
+
+ def encode_text(self, text: str) -> np.ndarray:
+ """使用 SBERT 模型編碼文本"""
+ # 初始化模型如果尚未完成
+ if self.sbert_model is None:
+ self._initialize_model()
+
+ if self.sbert_model is None:
+ raise RuntimeError("SBERT model not available")
+ return self.sbert_model.encode(text, convert_to_tensor=False)
diff --git a/user_query_analyzer.py b/user_query_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4ee4b0aecdbfb78461d02f389b7044271b972de
--- /dev/null
+++ b/user_query_analyzer.py
@@ -0,0 +1,511 @@
+import random
+import hashlib
+import numpy as np
+import sqlite3
+import re
+import traceback
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+from sentence_transformers import SentenceTransformer
+import torch
+from sklearn.metrics.pairwise import cosine_similarity
+from dog_database import get_dog_description
+from breed_health_info import breed_health_info
+from breed_noise_info import breed_noise_info
+from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores
+from query_understanding import QueryUnderstandingEngine, analyze_user_query
+from constraint_manager import ConstraintManager, apply_breed_constraints
+from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore
+from score_calibrator import ScoreCalibrator, calibrate_breed_scores
+from config_manager import get_config_manager, get_standardized_breed_data
+
+class UserQueryAnalyzer:
+ """
+ 用戶查詢分析器
+ 專門處理用戶輸入分析、生活方式關鍵字提取和偏好解析
+ """
+
+ def __init__(self, breed_list: List[str]):
+ """初始化用戶查詢分析器"""
+ self.breed_list = breed_list
+ self.comparative_keywords = {
+ 'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8,
+ 'then': 0.7, 'second': 0.7, 'followed': 0.6,
+ 'third': 0.5, 'least': 0.3, 'dislike': 0.2
+ }
+ self.stop_words = {
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
+ 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below',
+ 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
+ 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'i', 'me', 'my', 'myself',
+ 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he',
+ 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they',
+ 'them', 'their', 'theirs', 'themselves'
+ }
+
+ def parse_comparative_preferences(self, user_input: str) -> Dict[str, float]:
+ """解析比較性偏好表達"""
+ breed_scores = {}
+
+ # 標準化輸入
+ text = user_input.lower()
+
+ # 找到品種名稱和偏好關鍵字
+ for breed in self.breed_list:
+ breed_display = breed.replace('_', ' ').lower()
+ breed_words = breed_display.split()
+
+ # 檢查是否提到此品種
+ breed_mentioned = False
+ for word in breed_words:
+ if word in text:
+ breed_mentioned = True
+ break
+
+ if breed_mentioned:
+ # 在附近找到偏好關鍵字
+ breed_score = 0.5 # 預設分數
+
+ # 在品種名稱 50 字符內尋找關鍵字
+ breed_pos = text.find(breed_words[0])
+ if breed_pos != -1:
+ # 檢查背景中的關鍵字
+ context_start = max(0, breed_pos - 50)
+ context_end = min(len(text), breed_pos + 50)
+ context = text[context_start:context_end]
+
+ for keyword, score in self.comparative_keywords.items():
+ if keyword in context:
+ breed_score = max(breed_score, score)
+
+ breed_scores[breed] = breed_score
+
+ return breed_scores
+
+ def extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]:
+ """增強的生活方式關鍵字提取,具有更好的模式匹配"""
+ keywords = {
+ 'living_space': [],
+ 'activity_level': [],
+ 'family_situation': [],
+ 'noise_preference': [],
+ 'size_preference': [],
+ 'care_level': [],
+ 'special_needs': [],
+ 'intelligence_preference': [],
+ 'grooming_preference': [],
+ 'lifespan_preference': [],
+ 'temperament_preference': [],
+ 'experience_level': []
+ }
+
+ text = user_input.lower()
+
+ # 增強居住空間檢測
+ apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor']
+ house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm']
+
+ if any(term in text for term in apartment_terms):
+ keywords['living_space'].append('apartment')
+ if any(term in text for term in house_terms):
+ keywords['living_space'].append('house')
+
+ # 增強活動水平檢測
+ high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging',
+ 'athletic', 'adventure', 'vigorous', 'high energy', 'workout']
+ low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed',
+ 'peaceful', 'quiet lifestyle', 'minimal exercise']
+ moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise']
+
+ if any(term in text for term in high_activity):
+ keywords['activity_level'].append('high')
+ if any(term in text for term in low_activity):
+ keywords['activity_level'].append('low')
+ if any(term in text for term in moderate_activity):
+ keywords['activity_level'].append('moderate')
+
+ # 增強家庭情況檢測
+ children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age']
+ elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature']
+ single_terms = ['single', 'alone', 'individual', 'solo', 'myself']
+
+ if any(term in text for term in children_terms):
+ keywords['family_situation'].append('children')
+ if any(term in text for term in elderly_terms):
+ keywords['family_situation'].append('elderly')
+ if any(term in text for term in single_terms):
+ keywords['family_situation'].append('single')
+
+ # 增強噪音偏好檢測
+ quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise',
+ 'soft-spoken', 'calm', 'tranquil']
+ noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter']
+
+ if any(term in text for term in quiet_terms):
+ keywords['noise_preference'].append('low')
+ if any(term in text for term in noise_ok_terms):
+ keywords['noise_preference'].append('high')
+
+ # 增強體型偏好檢測
+ small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog']
+ large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great']
+ medium_terms = ['medium', 'moderate size', 'average', 'mid-sized']
+
+ if any(term in text for term in small_terms):
+ keywords['size_preference'].append('small')
+ if any(term in text for term in large_terms):
+ keywords['size_preference'].append('large')
+ if any(term in text for term in medium_terms):
+ keywords['size_preference'].append('medium')
+
+ # 增強照護水平檢測
+ low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go']
+ high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing']
+
+ if any(term in text for term in low_care):
+ keywords['care_level'].append('low')
+ if any(term in text for term in high_care):
+ keywords['care_level'].append('high')
+
+ # 智力偏好檢測(新增)
+ smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant']
+ independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves']
+
+ if any(term in text for term in smart_terms):
+ keywords['intelligence_preference'].append('high')
+ if any(term in text for term in independent_terms):
+ keywords['intelligence_preference'].append('independent')
+
+ # 美容偏好檢測(新增)
+ low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat']
+ high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming']
+
+ if any(term in text for term in low_grooming_terms):
+ keywords['grooming_preference'].append('low')
+ if any(term in text for term in high_grooming_terms):
+ keywords['grooming_preference'].append('high')
+
+ # 壽命偏好檢測(新增)
+ long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity']
+ healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution']
+
+ if any(term in text for term in long_lived_terms):
+ keywords['lifespan_preference'].append('long')
+ if any(term in text for term in healthy_terms):
+ keywords['lifespan_preference'].append('healthy')
+
+ # 氣質偏好檢測(新增)
+ gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile']
+ playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy']
+ protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive']
+ friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious']
+
+ if any(term in text for term in gentle_terms):
+ keywords['temperament_preference'].append('gentle')
+ if any(term in text for term in playful_terms):
+ keywords['temperament_preference'].append('playful')
+ if any(term in text for term in protective_terms):
+ keywords['temperament_preference'].append('protective')
+ if any(term in text for term in friendly_terms):
+ keywords['temperament_preference'].append('friendly')
+
+ # 經驗水平檢測(新增)
+ beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced']
+ advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned']
+
+ if any(term in text for term in beginner_terms):
+ keywords['experience_level'].append('beginner')
+ if any(term in text for term in advanced_terms):
+ keywords['experience_level'].append('advanced')
+
+ # 增強特殊需求檢測
+ guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender']
+ companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly']
+ hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed']
+ multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals']
+
+ if any(term in text for term in guard_terms):
+ keywords['special_needs'].append('guard')
+ if any(term in text for term in companion_terms):
+ keywords['special_needs'].append('companion')
+ if any(term in text for term in hypoallergenic_terms):
+ keywords['special_needs'].append('hypoallergenic')
+ if any(term in text for term in multi_pet_terms):
+ keywords['special_needs'].append('multi_pet')
+
+ return keywords
+
+ def preprocess_text(self, text: str) -> str:
+ """預處理文本"""
+ # 轉換為小寫
+ text = text.lower()
+
+ # 移除特殊字符,保留字母、數字和基本標點
+ text = re.sub(r'[^\w\s\-\']', ' ', text)
+
+ # 標準化空格
+ text = ' '.join(text.split())
+
+ return text
+
+ def generate_search_keywords(self, text: str) -> List[str]:
+ """
+ 為語義搜索生成關鍵字
+
+ Args:
+ text: 輸入文本
+
+ Returns:
+ 關鍵字列表
+ """
+ text = self.preprocess_text(text)
+ keywords = []
+
+ try:
+ # 分詞並過濾停用詞
+ words = text.split()
+ for word in words:
+ if len(word) > 2 and word not in self.stop_words:
+ keywords.append(word)
+
+ # 提取重要短語
+ phrases = self._extract_phrases(text)
+ keywords.extend(phrases)
+
+ # 移除重複項
+ keywords = list(set(keywords))
+
+ return keywords
+
+ except Exception as e:
+ print(f"Error generating search keywords: {str(e)}")
+ return []
+
+ def _extract_phrases(self, text: str) -> List[str]:
+ """
+ 提取重要短語
+
+ Args:
+ text: 輸入文本
+
+ Returns:
+ 短語列表
+ """
+ phrases = []
+
+ # 定義重要短語模式
+ phrase_patterns = [
+ r'good with \w+',
+ r'apartment \w+',
+ r'family \w+',
+ r'exercise \w+',
+ r'grooming \w+',
+ r'noise \w+',
+ r'training \w+',
+ r'health \w+',
+ r'\w+ friendly',
+ r'\w+ tolerant',
+ r'\w+ maintenance',
+ r'\w+ energy',
+ r'\w+ barking',
+ r'\w+ shedding'
+ ]
+
+ for pattern in phrase_patterns:
+ matches = re.findall(pattern, text)
+ phrases.extend(matches)
+
+ return phrases
+
+ def analyze_sentiment(self, text: str) -> Dict[str, float]:
+ """
+ 分析文本情感
+
+ Args:
+ text: 輸入文本
+
+ Returns:
+ 情感分析結果
+ """
+ # 簡化的情感分析實現
+ positive_words = [
+ 'love', 'like', 'prefer', 'enjoy', 'want', 'need', 'looking for',
+ 'good', 'great', 'excellent', 'perfect', 'wonderful', 'amazing'
+ ]
+
+ negative_words = [
+ 'hate', 'dislike', 'avoid', 'don\'t want', 'no', 'not',
+ 'bad', 'terrible', 'awful', 'horrible', 'worst', 'never'
+ ]
+
+ words = text.lower().split()
+ positive_count = sum(1 for word in words if word in positive_words)
+ negative_count = sum(1 for word in words if word in negative_words)
+ total_words = len(words)
+
+ if total_words == 0:
+ return {'positive': 0.5, 'negative': 0.5, 'neutral': 0.0}
+
+ positive_score = positive_count / total_words
+ negative_score = negative_count / total_words
+ neutral_score = max(0, 1 - positive_score - negative_score)
+
+ return {
+ 'positive': positive_score,
+ 'negative': negative_score,
+ 'neutral': neutral_score
+ }
+
+ def parse_user_requirements(self, user_input: str) -> Dict[str, Any]:
+ """更準確地解析用戶需求"""
+ requirements = {
+ 'living_space': None,
+ 'exercise_level': None,
+ 'preferred_size': None,
+ 'noise_tolerance': None
+ }
+
+ input_lower = user_input.lower()
+
+ # 居住空間檢測
+ if 'apartment' in input_lower or 'small' in input_lower:
+ requirements['living_space'] = 'apartment'
+ elif 'large house' in input_lower or 'big' in input_lower:
+ requirements['living_space'] = 'large_house'
+ elif 'medium' in input_lower:
+ requirements['living_space'] = 'medium_house'
+
+ # 運動水平檢測
+ if "don't exercise" in input_lower or 'low exercise' in input_lower:
+ requirements['exercise_level'] = 'low'
+ elif any(term in input_lower for term in ['hiking', 'running', 'active']):
+ requirements['exercise_level'] = 'high'
+ elif '30 minutes' in input_lower or 'moderate' in input_lower:
+ requirements['exercise_level'] = 'moderate'
+
+ # 體型偏好檢測
+ if any(term in input_lower for term in ['small dog', 'tiny', 'toy']):
+ requirements['preferred_size'] = 'small'
+ elif any(term in input_lower for term in ['large dog', 'big dog']):
+ requirements['preferred_size'] = 'large'
+ elif 'medium' in input_lower:
+ requirements['preferred_size'] = 'medium'
+
+ return requirements
+
+ def analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]:
+ """增強用戶描述分析"""
+ text = user_description.lower()
+ analysis = {
+ 'mentioned_breeds': [],
+ 'lifestyle_keywords': {},
+ 'preference_strength': {},
+ 'constraint_requirements': [],
+ 'user_context': {}
+ }
+
+ # 提取提及的品種
+ for breed in self.breed_list:
+ breed_display = breed.replace('_', ' ').lower()
+ if breed_display in text or any(word in text for word in breed_display.split()):
+ analysis['mentioned_breeds'].append(breed)
+ # 簡單偏好強度分析
+ if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']):
+ analysis['preference_strength'][breed] = 0.8
+ else:
+ analysis['preference_strength'][breed] = 0.5
+
+ # 提取約束要求
+ if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']):
+ analysis['constraint_requirements'].append('low_noise')
+ if any(word in text for word in ['apartment', 'small space', '公寓']):
+ analysis['constraint_requirements'].append('apartment_suitable')
+ if any(word in text for word in ['children', 'kids', 'family', '小孩']):
+ analysis['constraint_requirements'].append('child_friendly')
+
+ # 提取用戶背景
+ analysis['user_context'] = {
+ 'has_children': any(word in text for word in ['children', 'kids', '小孩']),
+ 'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house',
+ 'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate',
+ 'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']),
+ 'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate'
+ }
+
+ return analysis
+
+ def create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> 'UserPreferences':
+ """從分析結果創建用戶偏好物件"""
+ context = analysis['user_context']
+
+ # 推斷居住空間類型
+ living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small'
+
+ # 推斷院子權限
+ yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard'
+
+ # 推斷運動時間
+ activity_level = context.get('activity_level', 'moderate')
+ exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30}
+ exercise_time = exercise_time_map.get(activity_level, 60)
+
+ # 推斷運動類型
+ exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'}
+ exercise_type = exercise_type_map.get(activity_level, 'moderate_activity')
+
+ # 推斷噪音容忍度
+ noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium'
+
+ return UserPreferences(
+ living_space=living_space,
+ yard_access=yard_access,
+ exercise_time=exercise_time,
+ exercise_type=exercise_type,
+ grooming_commitment='medium',
+ experience_level=context.get('experience_level', 'intermediate'),
+ time_availability='moderate',
+ has_children=context.get('has_children', False),
+ children_age='school_age' if context.get('has_children', False) else None,
+ noise_tolerance=noise_tolerance,
+ space_for_play=(living_space != 'apartment'),
+ other_pets=False,
+ climate='moderate',
+ health_sensitivity='medium',
+ barking_acceptance=noise_tolerance,
+ size_preference='no_preference'
+ )
+
+ def get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]:
+ """獲取候選品種列表"""
+ candidate_breeds = set()
+
+ # 如果提及特定品種,優先包含
+ if analysis['mentioned_breeds']:
+ candidate_breeds.update(analysis['mentioned_breeds'])
+
+ # 根據約束要求過濾品種
+ if 'apartment_suitable' in analysis['constraint_requirements']:
+ apartment_suitable = [
+ 'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier',
+ 'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu'
+ ]
+ candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list)
+
+ if 'child_friendly' in analysis['constraint_requirements']:
+ child_friendly = [
+ 'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel',
+ 'Bichon_Frise', 'Poodle', 'Cocker_Spaniel'
+ ]
+ candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list)
+
+ # 如果候選品種不足,添加更多通用品種
+ if len(candidate_breeds) < 20:
+ general_breeds = [
+ 'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog',
+ 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier',
+ 'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer',
+ 'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua'
+ ]
+ candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list)
+
+ return list(candidate_breeds)[:30] # 限制候選數量以提高效率