Spaces:
Running
on
Zero
Running
on
Zero
| import random | |
| import hashlib | |
| import numpy as np | |
| import sqlite3 | |
| import re | |
| import traceback | |
| from typing import List, Dict, Tuple, Optional, Any | |
| from dataclasses import dataclass | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from dog_database import get_dog_description | |
| from breed_health_info import breed_health_info | |
| from breed_noise_info import breed_noise_info | |
| from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores | |
| from query_understanding import QueryUnderstandingEngine, analyze_user_query | |
| from constraint_manager import ConstraintManager, apply_breed_constraints | |
| from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore | |
| from score_calibrator import ScoreCalibrator, calibrate_breed_scores | |
| from config_manager import get_config_manager, get_standardized_breed_data | |
| class UserQueryAnalyzer: | |
| """ | |
| 用戶查詢分析器 | |
| 專門處理用戶輸入分析、生活方式關鍵字提取和偏好解析 | |
| """ | |
| def __init__(self, breed_list: List[str]): | |
| """初始化用戶查詢分析器""" | |
| self.breed_list = breed_list | |
| self.comparative_keywords = { | |
| 'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8, | |
| 'then': 0.7, 'second': 0.7, 'followed': 0.6, | |
| 'third': 0.5, 'least': 0.3, 'dislike': 0.2 | |
| } | |
| self.stop_words = { | |
| 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', | |
| 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', | |
| 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', | |
| 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'i', 'me', 'my', 'myself', | |
| 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', | |
| 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', | |
| 'them', 'their', 'theirs', 'themselves' | |
| } | |
| def parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: | |
| """解析比較性偏好表達""" | |
| breed_scores = {} | |
| # 標準化輸入 | |
| text = user_input.lower() | |
| # 找到品種名稱和偏好關鍵字 | |
| for breed in self.breed_list: | |
| breed_display = breed.replace('_', ' ').lower() | |
| breed_words = breed_display.split() | |
| # 檢查是否提到此品種 | |
| breed_mentioned = False | |
| for word in breed_words: | |
| if word in text: | |
| breed_mentioned = True | |
| break | |
| if breed_mentioned: | |
| # 在附近找到偏好關鍵字 | |
| breed_score = 0.5 # 預設分數 | |
| # 在品種名稱 50 字符內尋找關鍵字 | |
| breed_pos = text.find(breed_words[0]) | |
| if breed_pos != -1: | |
| # 檢查背景中的關鍵字 | |
| context_start = max(0, breed_pos - 50) | |
| context_end = min(len(text), breed_pos + 50) | |
| context = text[context_start:context_end] | |
| for keyword, score in self.comparative_keywords.items(): | |
| if keyword in context: | |
| breed_score = max(breed_score, score) | |
| breed_scores[breed] = breed_score | |
| return breed_scores | |
| def extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: | |
| """增強的生活方式關鍵字提取,具有更好的模式匹配""" | |
| keywords = { | |
| 'living_space': [], | |
| 'activity_level': [], | |
| 'family_situation': [], | |
| 'noise_preference': [], | |
| 'size_preference': [], | |
| 'care_level': [], | |
| 'special_needs': [], | |
| 'intelligence_preference': [], | |
| 'grooming_preference': [], | |
| 'lifespan_preference': [], | |
| 'temperament_preference': [], | |
| 'experience_level': [] | |
| } | |
| text = user_input.lower() | |
| # 增強居住空間檢測 | |
| apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor'] | |
| house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm'] | |
| if any(term in text for term in apartment_terms): | |
| keywords['living_space'].append('apartment') | |
| if any(term in text for term in house_terms): | |
| keywords['living_space'].append('house') | |
| # 增強活動水平檢測 | |
| high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging', | |
| 'athletic', 'adventure', 'vigorous', 'high energy', 'workout'] | |
| low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed', | |
| 'peaceful', 'quiet lifestyle', 'minimal exercise'] | |
| moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise'] | |
| if any(term in text for term in high_activity): | |
| keywords['activity_level'].append('high') | |
| if any(term in text for term in low_activity): | |
| keywords['activity_level'].append('low') | |
| if any(term in text for term in moderate_activity): | |
| keywords['activity_level'].append('moderate') | |
| # 增強家庭情況檢測 | |
| children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age'] | |
| elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature'] | |
| single_terms = ['single', 'alone', 'individual', 'solo', 'myself'] | |
| if any(term in text for term in children_terms): | |
| keywords['family_situation'].append('children') | |
| if any(term in text for term in elderly_terms): | |
| keywords['family_situation'].append('elderly') | |
| if any(term in text for term in single_terms): | |
| keywords['family_situation'].append('single') | |
| # 增強噪音偏好檢測 | |
| quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise', | |
| 'soft-spoken', 'calm', 'tranquil'] | |
| noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter'] | |
| if any(term in text for term in quiet_terms): | |
| keywords['noise_preference'].append('low') | |
| if any(term in text for term in noise_ok_terms): | |
| keywords['noise_preference'].append('high') | |
| # 增強體型偏好檢測 | |
| small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog'] | |
| large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great'] | |
| medium_terms = ['medium', 'moderate size', 'average', 'mid-sized'] | |
| if any(term in text for term in small_terms): | |
| keywords['size_preference'].append('small') | |
| if any(term in text for term in large_terms): | |
| keywords['size_preference'].append('large') | |
| if any(term in text for term in medium_terms): | |
| keywords['size_preference'].append('medium') | |
| # 增強照護水平檢測 | |
| low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go'] | |
| high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing'] | |
| if any(term in text for term in low_care): | |
| keywords['care_level'].append('low') | |
| if any(term in text for term in high_care): | |
| keywords['care_level'].append('high') | |
| # 智力偏好檢測(新增) | |
| smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant'] | |
| independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves'] | |
| if any(term in text for term in smart_terms): | |
| keywords['intelligence_preference'].append('high') | |
| if any(term in text for term in independent_terms): | |
| keywords['intelligence_preference'].append('independent') | |
| # 美容偏好檢測(新增) | |
| low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat'] | |
| high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming'] | |
| if any(term in text for term in low_grooming_terms): | |
| keywords['grooming_preference'].append('low') | |
| if any(term in text for term in high_grooming_terms): | |
| keywords['grooming_preference'].append('high') | |
| # 壽命偏好檢測(新增) | |
| long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity'] | |
| healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution'] | |
| if any(term in text for term in long_lived_terms): | |
| keywords['lifespan_preference'].append('long') | |
| if any(term in text for term in healthy_terms): | |
| keywords['lifespan_preference'].append('healthy') | |
| # 氣質偏好檢測(新增) | |
| gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile'] | |
| playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy'] | |
| protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive'] | |
| friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious'] | |
| if any(term in text for term in gentle_terms): | |
| keywords['temperament_preference'].append('gentle') | |
| if any(term in text for term in playful_terms): | |
| keywords['temperament_preference'].append('playful') | |
| if any(term in text for term in protective_terms): | |
| keywords['temperament_preference'].append('protective') | |
| if any(term in text for term in friendly_terms): | |
| keywords['temperament_preference'].append('friendly') | |
| # 經驗水平檢測(新增) | |
| beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced'] | |
| advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned'] | |
| if any(term in text for term in beginner_terms): | |
| keywords['experience_level'].append('beginner') | |
| if any(term in text for term in advanced_terms): | |
| keywords['experience_level'].append('advanced') | |
| # 增強特殊需求檢測 | |
| guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender'] | |
| companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly'] | |
| hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed'] | |
| multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals'] | |
| if any(term in text for term in guard_terms): | |
| keywords['special_needs'].append('guard') | |
| if any(term in text for term in companion_terms): | |
| keywords['special_needs'].append('companion') | |
| if any(term in text for term in hypoallergenic_terms): | |
| keywords['special_needs'].append('hypoallergenic') | |
| if any(term in text for term in multi_pet_terms): | |
| keywords['special_needs'].append('multi_pet') | |
| return keywords | |
| def preprocess_text(self, text: str) -> str: | |
| """預處理文本""" | |
| # 轉換為小寫 | |
| text = text.lower() | |
| # 移除特殊字符,保留字母、數字和基本標點 | |
| text = re.sub(r'[^\w\s\-\']', ' ', text) | |
| # 標準化空格 | |
| text = ' '.join(text.split()) | |
| return text | |
| def generate_search_keywords(self, text: str) -> List[str]: | |
| """ | |
| 為語義搜索生成關鍵字 | |
| Args: | |
| text: 輸入文本 | |
| Returns: | |
| 關鍵字列表 | |
| """ | |
| text = self.preprocess_text(text) | |
| keywords = [] | |
| try: | |
| # 分詞並過濾停用詞 | |
| words = text.split() | |
| for word in words: | |
| if len(word) > 2 and word not in self.stop_words: | |
| keywords.append(word) | |
| # 提取重要短語 | |
| phrases = self._extract_phrases(text) | |
| keywords.extend(phrases) | |
| # 移除重複項 | |
| keywords = list(set(keywords)) | |
| return keywords | |
| except Exception as e: | |
| print(f"Error generating search keywords: {str(e)}") | |
| return [] | |
| def _extract_phrases(self, text: str) -> List[str]: | |
| """ | |
| 提取重要短語 | |
| Args: | |
| text: 輸入文本 | |
| Returns: | |
| 短語列表 | |
| """ | |
| phrases = [] | |
| # 定義重要短語模式 | |
| phrase_patterns = [ | |
| r'good with \w+', | |
| r'apartment \w+', | |
| r'family \w+', | |
| r'exercise \w+', | |
| r'grooming \w+', | |
| r'noise \w+', | |
| r'training \w+', | |
| r'health \w+', | |
| r'\w+ friendly', | |
| r'\w+ tolerant', | |
| r'\w+ maintenance', | |
| r'\w+ energy', | |
| r'\w+ barking', | |
| r'\w+ shedding' | |
| ] | |
| for pattern in phrase_patterns: | |
| matches = re.findall(pattern, text) | |
| phrases.extend(matches) | |
| return phrases | |
| def analyze_sentiment(self, text: str) -> Dict[str, float]: | |
| """ | |
| 分析文本情感 | |
| Args: | |
| text: 輸入文本 | |
| Returns: | |
| 情感分析結果 | |
| """ | |
| # 簡化的情感分析實現 | |
| positive_words = [ | |
| 'love', 'like', 'prefer', 'enjoy', 'want', 'need', 'looking for', | |
| 'good', 'great', 'excellent', 'perfect', 'wonderful', 'amazing' | |
| ] | |
| negative_words = [ | |
| 'hate', 'dislike', 'avoid', 'don\'t want', 'no', 'not', | |
| 'bad', 'terrible', 'awful', 'horrible', 'worst', 'never' | |
| ] | |
| words = text.lower().split() | |
| positive_count = sum(1 for word in words if word in positive_words) | |
| negative_count = sum(1 for word in words if word in negative_words) | |
| total_words = len(words) | |
| if total_words == 0: | |
| return {'positive': 0.5, 'negative': 0.5, 'neutral': 0.0} | |
| positive_score = positive_count / total_words | |
| negative_score = negative_count / total_words | |
| neutral_score = max(0, 1 - positive_score - negative_score) | |
| return { | |
| 'positive': positive_score, | |
| 'negative': negative_score, | |
| 'neutral': neutral_score | |
| } | |
| def parse_user_requirements(self, user_input: str) -> Dict[str, Any]: | |
| """更準確地解析用戶需求""" | |
| requirements = { | |
| 'living_space': None, | |
| 'exercise_level': None, | |
| 'preferred_size': None, | |
| 'noise_tolerance': None | |
| } | |
| input_lower = user_input.lower() | |
| # 居住空間檢測 | |
| if 'apartment' in input_lower or 'small' in input_lower: | |
| requirements['living_space'] = 'apartment' | |
| elif 'large house' in input_lower or 'big' in input_lower: | |
| requirements['living_space'] = 'large_house' | |
| elif 'medium' in input_lower: | |
| requirements['living_space'] = 'medium_house' | |
| # 運動水平檢測 | |
| if "don't exercise" in input_lower or 'low exercise' in input_lower: | |
| requirements['exercise_level'] = 'low' | |
| elif any(term in input_lower for term in ['hiking', 'running', 'active']): | |
| requirements['exercise_level'] = 'high' | |
| elif '30 minutes' in input_lower or 'moderate' in input_lower: | |
| requirements['exercise_level'] = 'moderate' | |
| # 體型偏好檢測 | |
| if any(term in input_lower for term in ['small dog', 'tiny', 'toy']): | |
| requirements['preferred_size'] = 'small' | |
| elif any(term in input_lower for term in ['large dog', 'big dog']): | |
| requirements['preferred_size'] = 'large' | |
| elif 'medium' in input_lower: | |
| requirements['preferred_size'] = 'medium' | |
| return requirements | |
| def analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: | |
| """增強用戶描述分析""" | |
| text = user_description.lower() | |
| analysis = { | |
| 'mentioned_breeds': [], | |
| 'lifestyle_keywords': {}, | |
| 'preference_strength': {}, | |
| 'constraint_requirements': [], | |
| 'user_context': {} | |
| } | |
| # 提取提及的品種 | |
| for breed in self.breed_list: | |
| breed_display = breed.replace('_', ' ').lower() | |
| if breed_display in text or any(word in text for word in breed_display.split()): | |
| analysis['mentioned_breeds'].append(breed) | |
| # 簡單偏好強度分析 | |
| if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']): | |
| analysis['preference_strength'][breed] = 0.8 | |
| else: | |
| analysis['preference_strength'][breed] = 0.5 | |
| # 提取約束要求 | |
| if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']): | |
| analysis['constraint_requirements'].append('low_noise') | |
| if any(word in text for word in ['apartment', 'small space', '公寓']): | |
| analysis['constraint_requirements'].append('apartment_suitable') | |
| if any(word in text for word in ['children', 'kids', 'family', '小孩']): | |
| analysis['constraint_requirements'].append('child_friendly') | |
| # 提取用戶背景 | |
| analysis['user_context'] = { | |
| 'has_children': any(word in text for word in ['children', 'kids', '小孩']), | |
| 'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house', | |
| 'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate', | |
| 'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']), | |
| 'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate' | |
| } | |
| return analysis | |
| def create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> 'UserPreferences': | |
| """從分析結果創建用戶偏好物件""" | |
| context = analysis['user_context'] | |
| # 推斷居住空間類型 | |
| living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small' | |
| # 推斷院子權限 | |
| yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard' | |
| # 推斷運動時間 | |
| activity_level = context.get('activity_level', 'moderate') | |
| exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30} | |
| exercise_time = exercise_time_map.get(activity_level, 60) | |
| # 推斷運動類型 | |
| exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'} | |
| exercise_type = exercise_type_map.get(activity_level, 'moderate_activity') | |
| # 推斷噪音容忍度 | |
| noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium' | |
| return UserPreferences( | |
| living_space=living_space, | |
| yard_access=yard_access, | |
| exercise_time=exercise_time, | |
| exercise_type=exercise_type, | |
| grooming_commitment='medium', | |
| experience_level=context.get('experience_level', 'intermediate'), | |
| time_availability='moderate', | |
| has_children=context.get('has_children', False), | |
| children_age='school_age' if context.get('has_children', False) else None, | |
| noise_tolerance=noise_tolerance, | |
| space_for_play=(living_space != 'apartment'), | |
| other_pets=False, | |
| climate='moderate', | |
| health_sensitivity='medium', | |
| barking_acceptance=noise_tolerance, | |
| size_preference='no_preference' | |
| ) | |
| def get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: | |
| """獲取候選品種列表""" | |
| candidate_breeds = set() | |
| # 如果提及特定品種,優先包含 | |
| if analysis['mentioned_breeds']: | |
| candidate_breeds.update(analysis['mentioned_breeds']) | |
| # 根據約束要求過濾品種 | |
| if 'apartment_suitable' in analysis['constraint_requirements']: | |
| apartment_suitable = [ | |
| 'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', | |
| 'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu' | |
| ] | |
| candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list) | |
| if 'child_friendly' in analysis['constraint_requirements']: | |
| child_friendly = [ | |
| 'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel', | |
| 'Bichon_Frise', 'Poodle', 'Cocker_Spaniel' | |
| ] | |
| candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list) | |
| # 如果候選品種不足,添加更多通用品種 | |
| if len(candidate_breeds) < 20: | |
| general_breeds = [ | |
| 'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog', | |
| 'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier', | |
| 'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer', | |
| 'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua' | |
| ] | |
| candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list) | |
| return list(candidate_breeds)[:30] # 限制候選數量以提高效率 | |