LiamKhoaLe commited on
Commit
4bc06b1
·
1 Parent(s): 7759b7c

Enh search strats

Browse files
search/engines/cooking.py CHANGED
@@ -16,56 +16,133 @@ class CookingSearchEngine:
16
  })
17
  self.timeout = timeout
18
 
19
- # Curated cooking sources
20
  self.cooking_sources = {
21
  'allrecipes': {
22
  'base_url': 'https://www.allrecipes.com',
23
  'search_url': 'https://www.allrecipes.com/search',
24
- 'domains': ['allrecipes.com']
 
 
25
  },
26
  'food_network': {
27
  'base_url': 'https://www.foodnetwork.com',
28
  'search_url': 'https://www.foodnetwork.com/search',
29
- 'domains': ['foodnetwork.com']
 
 
30
  },
31
  'epicurious': {
32
  'base_url': 'https://www.epicurious.com',
33
  'search_url': 'https://www.epicurious.com/search',
34
- 'domains': ['epicurious.com']
 
 
35
  },
36
  'serious_eats': {
37
  'base_url': 'https://www.seriouseats.com',
38
  'search_url': 'https://www.seriouseats.com/search',
39
- 'domains': ['seriouseats.com']
 
 
40
  },
41
  'bon_appetit': {
42
  'base_url': 'https://www.bonappetit.com',
43
  'search_url': 'https://www.bonappetit.com/search',
44
- 'domains': ['bonappetit.com']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
  }
47
 
48
  def search(self, query: str, num_results: int = 10) -> List[Dict]:
49
- """Search cooking sources for relevant information"""
50
  results = []
51
 
52
- # Strategy 1: Direct cooking source searches
53
- for source_name, source_config in self.cooking_sources.items():
 
 
 
 
 
 
54
  if len(results) >= num_results:
55
  break
56
 
57
- source_results = self._search_cooking_source(query, source_name, source_config)
58
- results.extend(source_results)
59
-
60
- # Add delay between requests
61
- time.sleep(0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Strategy 2: Cooking fallback sources
 
 
 
 
 
64
  if len(results) < num_results:
65
  fallback_results = self._get_fallback_sources(query, num_results - len(results))
66
  results.extend(fallback_results)
67
 
68
- return results[:num_results]
 
 
69
 
70
  def _search_cooking_source(self, query: str, source_name: str, source_config: Dict) -> List[Dict]:
71
  """Search a specific cooking source"""
@@ -195,3 +272,120 @@ class CookingSearchEngine:
195
  ]
196
 
197
  return fallback_sources[:num_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  })
17
  self.timeout = timeout
18
 
19
+ # Comprehensive cooking sources with enhanced search strategies
20
  self.cooking_sources = {
21
  'allrecipes': {
22
  'base_url': 'https://www.allrecipes.com',
23
  'search_url': 'https://www.allrecipes.com/search',
24
+ 'domains': ['allrecipes.com'],
25
+ 'search_params': ['q', 'query', 'search'],
26
+ 'priority': 1
27
  },
28
  'food_network': {
29
  'base_url': 'https://www.foodnetwork.com',
30
  'search_url': 'https://www.foodnetwork.com/search',
31
+ 'domains': ['foodnetwork.com'],
32
+ 'search_params': ['q', 'query', 'search'],
33
+ 'priority': 1
34
  },
35
  'epicurious': {
36
  'base_url': 'https://www.epicurious.com',
37
  'search_url': 'https://www.epicurious.com/search',
38
+ 'domains': ['epicurious.com'],
39
+ 'search_params': ['q', 'query', 'search'],
40
+ 'priority': 1
41
  },
42
  'serious_eats': {
43
  'base_url': 'https://www.seriouseats.com',
44
  'search_url': 'https://www.seriouseats.com/search',
45
+ 'domains': ['seriouseats.com'],
46
+ 'search_params': ['q', 'query', 'search'],
47
+ 'priority': 1
48
  },
49
  'bon_appetit': {
50
  'base_url': 'https://www.bonappetit.com',
51
  'search_url': 'https://www.bonappetit.com/search',
52
+ 'domains': ['bonappetit.com'],
53
+ 'search_params': ['q', 'query', 'search'],
54
+ 'priority': 1
55
+ },
56
+ 'taste_of_home': {
57
+ 'base_url': 'https://www.tasteofhome.com',
58
+ 'search_url': 'https://www.tasteofhome.com/search',
59
+ 'domains': ['tasteofhome.com'],
60
+ 'search_params': ['q', 'query', 'search'],
61
+ 'priority': 2
62
+ },
63
+ 'food_com': {
64
+ 'base_url': 'https://www.food.com',
65
+ 'search_url': 'https://www.food.com/search',
66
+ 'domains': ['food.com'],
67
+ 'search_params': ['q', 'query', 'search'],
68
+ 'priority': 2
69
+ },
70
+ 'bbc_good_food': {
71
+ 'base_url': 'https://www.bbcgoodfood.com',
72
+ 'search_url': 'https://www.bbcgoodfood.com/search',
73
+ 'domains': ['bbcgoodfood.com'],
74
+ 'search_params': ['q', 'query', 'search'],
75
+ 'priority': 2
76
+ },
77
+ 'martha_stewart': {
78
+ 'base_url': 'https://www.marthastewart.com',
79
+ 'search_url': 'https://www.marthastewart.com/search',
80
+ 'domains': ['marthastewart.com'],
81
+ 'search_params': ['q', 'query', 'search'],
82
+ 'priority': 2
83
+ },
84
+ 'king_arthur_baking': {
85
+ 'base_url': 'https://www.kingarthurbaking.com',
86
+ 'search_url': 'https://www.kingarthurbaking.com/search',
87
+ 'domains': ['kingarthurbaking.com'],
88
+ 'search_params': ['q', 'query', 'search'],
89
+ 'priority': 2
90
  }
91
  }
92
 
93
  def search(self, query: str, num_results: int = 10) -> List[Dict]:
94
+ """Search cooking sources for relevant information with enhanced strategies"""
95
  results = []
96
 
97
+ # Enhanced query processing
98
+ enhanced_queries = self._create_enhanced_queries(query)
99
+ logger.info(f"Enhanced queries for cooking search: {enhanced_queries}")
100
+
101
+ # Strategy 1: Priority-based source searches
102
+ priority_sources = self._get_priority_sources()
103
+
104
+ for priority_level in [1, 2]: # Search priority 1 sources first, then priority 2
105
  if len(results) >= num_results:
106
  break
107
 
108
+ for source_name in priority_sources.get(priority_level, []):
109
+ if len(results) >= num_results:
110
+ break
111
+
112
+ source_config = self.cooking_sources[source_name]
113
+
114
+ # Try multiple query variations for each source
115
+ for query_variant in enhanced_queries:
116
+ if len(results) >= num_results:
117
+ break
118
+
119
+ source_results = self._search_cooking_source(query_variant, source_name, source_config)
120
+ if source_results:
121
+ results.extend(source_results)
122
+ logger.info(f"{source_name} found {len(source_results)} results for query: {query_variant}")
123
+ break # Move to next source if we found results
124
+
125
+ # Add delay between requests
126
+ time.sleep(0.3)
127
+
128
+ # Strategy 2: Recipe-specific searches if we need more results
129
+ if len(results) < num_results:
130
+ recipe_results = self._search_recipe_specific(query, num_results - len(results))
131
+ results.extend(recipe_results)
132
 
133
+ # Strategy 3: Technique-specific searches
134
+ if len(results) < num_results:
135
+ technique_results = self._search_technique_specific(query, num_results - len(results))
136
+ results.extend(technique_results)
137
+
138
+ # Strategy 4: Cooking fallback sources
139
  if len(results) < num_results:
140
  fallback_results = self._get_fallback_sources(query, num_results - len(results))
141
  results.extend(fallback_results)
142
 
143
+ # Remove duplicates and return top results
144
+ unique_results = self._remove_duplicates(results)
145
+ return unique_results[:num_results]
146
 
147
  def _search_cooking_source(self, query: str, source_name: str, source_config: Dict) -> List[Dict]:
148
  """Search a specific cooking source"""
 
272
  ]
273
 
274
  return fallback_sources[:num_results]
275
+
276
+ def _create_enhanced_queries(self, query: str) -> List[str]:
277
+ """Create enhanced query variations for better cooking search results"""
278
+ import re
279
+
280
+ # Clean the base query
281
+ base_query = re.sub(r'[^\w\s\-\.]', ' ', query).strip()
282
+ base_query = re.sub(r'\s+', ' ', base_query)
283
+
284
+ enhanced_queries = [base_query]
285
+
286
+ # Add cooking-specific enhancements
287
+ cooking_enhancements = [
288
+ f"{base_query} recipe",
289
+ f"{base_query} cooking method",
290
+ f"{base_query} how to cook",
291
+ f"{base_query} ingredients",
292
+ f"{base_query} technique",
293
+ f"{base_query} tutorial"
294
+ ]
295
+
296
+ # Add technique-specific queries
297
+ cooking_techniques = ['bake', 'roast', 'grill', 'fry', 'boil', 'steam', 'sauté', 'braise', 'poach']
298
+ for technique in cooking_techniques:
299
+ if technique in base_query.lower():
300
+ enhanced_queries.append(f"{base_query} {technique} method")
301
+ enhanced_queries.append(f"how to {technique} {base_query}")
302
+
303
+ # Add cuisine-specific enhancements
304
+ cuisines = ['italian', 'chinese', 'mexican', 'french', 'indian', 'thai', 'japanese', 'mediterranean']
305
+ for cuisine in cuisines:
306
+ if cuisine in base_query.lower():
307
+ enhanced_queries.append(f"{cuisine} {base_query} recipe")
308
+ enhanced_queries.append(f"authentic {cuisine} {base_query}")
309
+
310
+ # Remove duplicates and limit
311
+ unique_queries = list(dict.fromkeys(enhanced_queries))
312
+ return unique_queries[:5] # Limit to 5 query variations
313
+
314
+ def _get_priority_sources(self) -> Dict[int, List[str]]:
315
+ """Get sources organized by priority"""
316
+ priority_sources = {1: [], 2: []}
317
+
318
+ for source_name, config in self.cooking_sources.items():
319
+ priority = config.get('priority', 2)
320
+ priority_sources[priority].append(source_name)
321
+
322
+ return priority_sources
323
+
324
+ def _search_recipe_specific(self, query: str, num_results: int) -> List[Dict]:
325
+ """Search for recipe-specific content"""
326
+ recipe_queries = [
327
+ f"{query} recipe ingredients",
328
+ f"{query} recipe instructions",
329
+ f"{query} recipe steps",
330
+ f"how to make {query}",
331
+ f"{query} cooking recipe"
332
+ ]
333
+
334
+ results = []
335
+ for recipe_query in recipe_queries:
336
+ if len(results) >= num_results:
337
+ break
338
+
339
+ # Search top priority sources for recipe content
340
+ priority_sources = self._get_priority_sources()
341
+ for source_name in priority_sources.get(1, []):
342
+ if len(results) >= num_results:
343
+ break
344
+
345
+ source_config = self.cooking_sources[source_name]
346
+ source_results = self._search_cooking_source(recipe_query, source_name, source_config)
347
+ results.extend(source_results)
348
+ time.sleep(0.2)
349
+
350
+ return results[:num_results]
351
+
352
+ def _search_technique_specific(self, query: str, num_results: int) -> List[Dict]:
353
+ """Search for cooking technique-specific content"""
354
+ technique_queries = [
355
+ f"{query} cooking technique",
356
+ f"{query} cooking method",
357
+ f"how to cook {query}",
358
+ f"{query} preparation method",
359
+ f"{query} cooking tips"
360
+ ]
361
+
362
+ results = []
363
+ for technique_query in technique_queries:
364
+ if len(results) >= num_results:
365
+ break
366
+
367
+ # Search priority sources for technique content
368
+ priority_sources = self._get_priority_sources()
369
+ for source_name in priority_sources.get(1, []):
370
+ if len(results) >= num_results:
371
+ break
372
+
373
+ source_config = self.cooking_sources[source_name]
374
+ source_results = self._search_cooking_source(technique_query, source_name, source_config)
375
+ results.extend(source_results)
376
+ time.sleep(0.2)
377
+
378
+ return results[:num_results]
379
+
380
+ def _remove_duplicates(self, results: List[Dict]) -> List[Dict]:
381
+ """Remove duplicate results based on URL"""
382
+ seen_urls = set()
383
+ unique_results = []
384
+
385
+ for result in results:
386
+ url = result.get('url', '')
387
+ if url and url not in seen_urls:
388
+ seen_urls.add(url)
389
+ unique_results.append(result)
390
+
391
+ return unique_results
search/extractors/content.py CHANGED
@@ -22,15 +22,19 @@ class ContentExtractor:
22
  })
23
  self.timeout = timeout
24
 
25
- # Medical content indicators
26
- self.medical_indicators = [
27
- 'symptom', 'treatment', 'diagnosis', 'medicine', 'medication',
28
- 'therapy', 'condition', 'disease', 'health', 'medical',
29
- 'doctor', 'physician', 'patient', 'clinical', 'study'
 
 
 
 
30
  ]
31
 
32
  def extract(self, url: str, max_length: int = 2000) -> Optional[str]:
33
- """Extract content from a URL with medical focus"""
34
  try:
35
  response = self.session.get(url, timeout=self.timeout)
36
  response.raise_for_status()
@@ -49,11 +53,11 @@ class ContentExtractor:
49
  # Clean and process content
50
  cleaned_content = self._clean_content(content)
51
 
52
- # Focus on medical content if possible
53
- medical_content = self._extract_medical_content(cleaned_content)
54
 
55
  # Truncate to max length
56
- final_content = self._truncate_content(medical_content or cleaned_content, max_length)
57
 
58
  return final_content if final_content else None
59
 
@@ -145,29 +149,29 @@ class ContentExtractor:
145
 
146
  return content.strip()
147
 
148
- def _extract_medical_content(self, content: str) -> Optional[str]:
149
- """Extract medical-focused content from the text"""
150
  if not content:
151
  return None
152
 
153
  # Split content into sentences
154
  sentences = re.split(r'[.!?]+', content)
155
- medical_sentences = []
156
 
157
  for sentence in sentences:
158
  sentence = sentence.strip()
159
  if len(sentence) < 20: # Skip very short sentences
160
  continue
161
 
162
- # Check if sentence contains medical indicators
163
  sentence_lower = sentence.lower()
164
- if any(indicator in sentence_lower for indicator in self.medical_indicators):
165
- medical_sentences.append(sentence)
166
 
167
- if medical_sentences:
168
- # Return medical sentences, prioritizing longer ones
169
- medical_sentences.sort(key=len, reverse=True)
170
- return '. '.join(medical_sentences[:10]) + '.'
171
 
172
  return None
173
 
 
22
  })
23
  self.timeout = timeout
24
 
25
+ # Cooking content indicators
26
+ self.cooking_indicators = [
27
+ 'recipe', 'ingredients', 'instructions', 'cooking', 'baking', 'roasting',
28
+ 'grilling', 'frying', 'boiling', 'steaming', 'sautéing', 'braising',
29
+ 'seasoning', 'spices', 'herbs', 'sauce', 'marinade', 'dressing',
30
+ 'temperature', 'timing', 'preparation', 'technique', 'method',
31
+ 'oven', 'stovetop', 'grill', 'pan', 'pot', 'skillet', 'knife',
32
+ 'cutting', 'chopping', 'dicing', 'slicing', 'mixing', 'stirring',
33
+ 'servings', 'cook time', 'prep time', 'total time', 'difficulty'
34
  ]
35
 
36
  def extract(self, url: str, max_length: int = 2000) -> Optional[str]:
37
+ """Extract content from a URL with cooking focus"""
38
  try:
39
  response = self.session.get(url, timeout=self.timeout)
40
  response.raise_for_status()
 
53
  # Clean and process content
54
  cleaned_content = self._clean_content(content)
55
 
56
+ # Focus on cooking content if possible
57
+ cooking_content = self._extract_cooking_content(cleaned_content)
58
 
59
  # Truncate to max length
60
+ final_content = self._truncate_content(cooking_content or cleaned_content, max_length)
61
 
62
  return final_content if final_content else None
63
 
 
149
 
150
  return content.strip()
151
 
152
+ def _extract_cooking_content(self, content: str) -> Optional[str]:
153
+ """Extract cooking-focused content from the text"""
154
  if not content:
155
  return None
156
 
157
  # Split content into sentences
158
  sentences = re.split(r'[.!?]+', content)
159
+ cooking_sentences = []
160
 
161
  for sentence in sentences:
162
  sentence = sentence.strip()
163
  if len(sentence) < 20: # Skip very short sentences
164
  continue
165
 
166
+ # Check if sentence contains cooking indicators
167
  sentence_lower = sentence.lower()
168
+ if any(indicator in sentence_lower for indicator in self.cooking_indicators):
169
+ cooking_sentences.append(sentence)
170
 
171
+ if cooking_sentences:
172
+ # Return cooking sentences, prioritizing longer ones
173
+ cooking_sentences.sort(key=len, reverse=True)
174
+ return '. '.join(cooking_sentences[:15]) + '.' # More sentences for cooking content
175
 
176
  return None
177
 
search/processors/cooking.py CHANGED
@@ -9,17 +9,47 @@ class CookingSearchProcessor:
9
  """Process and enhance cooking search results"""
10
 
11
  def __init__(self):
12
- self.cooking_keywords = [
13
- 'recipe', 'cooking', 'baking', 'roasting', 'grilling', 'frying', 'boiling', 'steaming',
14
- 'ingredients', 'seasoning', 'spices', 'herbs', 'sauce', 'marinade', 'dressing',
15
- 'technique', 'method', 'temperature', 'timing', 'preparation', 'cooking time',
16
- 'oven', 'stovetop', 'grill', 'pan', 'pot', 'skillet', 'knife', 'cutting',
17
- 'vegetarian', 'vegan', 'gluten-free', 'dairy-free', 'keto', 'paleo', 'diet',
18
- 'appetizer', 'main course', 'dessert', 'breakfast', 'lunch', 'dinner',
19
- 'cuisine', 'italian', 'chinese', 'mexican', 'french', 'indian', 'thai',
20
- 'substitution', 'alternative', 'variation', 'modification', 'adaptation',
21
- 'troubleshooting', 'tips', 'tricks', 'hacks', 'mistakes', 'common errors'
22
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def process_results(self, results: List[Dict], user_query: str) -> Tuple[str, Dict[int, str]]:
25
  """Process search results and create comprehensive cooking summary"""
@@ -62,37 +92,67 @@ class CookingSearchProcessor:
62
  return relevant_results[:10]
63
 
64
  def _calculate_relevance_score(self, result: Dict, user_query: str) -> float:
65
- """Calculate cooking relevance score for a result"""
66
  score = 0.0
67
 
68
- # Check title relevance
69
  title = result.get('title', '').lower()
 
70
  query_lower = user_query.lower()
71
 
72
- # Direct query match in title
73
- if any(word in title for word in query_lower.split()):
74
- score += 0.4
75
-
76
- # Cooking keyword match in title
77
- cooking_matches = sum(1 for keyword in self.cooking_keywords if keyword in title)
78
- score += min(cooking_matches * 0.1, 0.3)
79
-
80
- # Domain credibility for cooking sources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  url = result.get('url', '').lower()
82
  credible_domains = [
83
  'allrecipes.com', 'foodnetwork.com', 'epicurious.com', 'seriouseats.com',
84
  'bonappetit.com', 'cooking.nytimes.com', 'tasteofhome.com', 'food.com',
85
  'bbcgoodfood.com', 'jamieoliver.com', 'gordonramsay.com', 'marthastewart.com',
86
- 'kingarthurbaking.com', 'sallysbakingaddiction.com', 'smittenkitchen.com'
 
 
87
  ]
88
 
89
  if any(domain in url for domain in credible_domains):
90
- score += 0.3
91
 
92
  # Source type bonus for cooking
93
  source = result.get('source', '')
94
  if 'cooking' in source or 'recipe' in source or any(domain in source for domain in credible_domains):
95
- score += 0.2
 
 
 
 
 
 
 
 
96
 
97
  return min(score, 1.0)
98
 
@@ -174,11 +234,13 @@ class CookingSearchProcessor:
174
  return combined_summary
175
 
176
  def _group_by_topic(self, results: List[Dict]) -> Dict[str, List[Dict]]:
177
- """Group results by cooking topic"""
178
  topics = {
179
  'recipes': [],
180
  'techniques': [],
181
  'ingredients': [],
 
 
182
  'general': []
183
  }
184
 
@@ -187,13 +249,17 @@ class CookingSearchProcessor:
187
  summary_lower = result.get('summary', '').lower()
188
  content_lower = f"{title_lower} {summary_lower}"
189
 
190
- # Categorize by content
191
- if any(word in content_lower for word in ['recipe', 'ingredients', 'instructions', 'steps']):
192
  topics['recipes'].append(result)
193
- elif any(word in content_lower for word in ['technique', 'method', 'how to', 'cooking']):
194
  topics['techniques'].append(result)
195
- elif any(word in content_lower for word in ['ingredients', 'substitution', 'alternative', 'variation']):
196
  topics['ingredients'].append(result)
 
 
 
 
197
  else:
198
  topics['general'].append(result)
199
 
@@ -206,10 +272,12 @@ class CookingSearchProcessor:
206
 
207
  # Add topic header
208
  topic_headers = {
209
- 'recipes': "**Recipes and Instructions:**",
210
- 'techniques': "**Cooking Techniques:**",
211
- 'ingredients': "**Ingredients and Substitutions:**",
212
- 'general': "**General Information:**"
 
 
213
  }
214
 
215
  header = topic_headers.get(topic, "**Information:**")
 
9
  """Process and enhance cooking search results"""
10
 
11
  def __init__(self):
12
+ # Enhanced cooking keywords with categories
13
+ self.cooking_keywords = {
14
+ 'primary': [
15
+ 'recipe', 'cooking', 'baking', 'roasting', 'grilling', 'frying', 'boiling', 'steaming',
16
+ 'sautéing', 'braising', 'poaching', 'broiling', 'searing', 'simmering'
17
+ ],
18
+ 'ingredients': [
19
+ 'ingredients', 'seasoning', 'spices', 'herbs', 'sauce', 'marinade', 'dressing',
20
+ 'oil', 'butter', 'flour', 'sugar', 'salt', 'pepper', 'garlic', 'onion',
21
+ 'vegetables', 'meat', 'chicken', 'beef', 'pork', 'fish', 'seafood'
22
+ ],
23
+ 'techniques': [
24
+ 'technique', 'method', 'temperature', 'timing', 'preparation', 'cooking time',
25
+ 'prep time', 'total time', 'servings', 'difficulty', 'skill level'
26
+ ],
27
+ 'equipment': [
28
+ 'oven', 'stovetop', 'grill', 'pan', 'pot', 'skillet', 'knife', 'cutting',
29
+ 'mixing', 'stirring', 'chopping', 'dicing', 'slicing', 'whisking'
30
+ ],
31
+ 'dietary': [
32
+ 'vegetarian', 'vegan', 'gluten-free', 'dairy-free', 'keto', 'paleo', 'diet',
33
+ 'healthy', 'low-carb', 'low-fat', 'protein', 'fiber'
34
+ ],
35
+ 'meal_types': [
36
+ 'appetizer', 'main course', 'dessert', 'breakfast', 'lunch', 'dinner',
37
+ 'snack', 'side dish', 'soup', 'salad', 'pasta', 'pizza'
38
+ ],
39
+ 'cuisines': [
40
+ 'italian', 'chinese', 'mexican', 'french', 'indian', 'thai', 'japanese',
41
+ 'mediterranean', 'american', 'asian', 'european', 'fusion'
42
+ ],
43
+ 'modifications': [
44
+ 'substitution', 'alternative', 'variation', 'modification', 'adaptation',
45
+ 'troubleshooting', 'tips', 'tricks', 'hacks', 'mistakes', 'common errors'
46
+ ]
47
+ }
48
+
49
+ # Flatten all keywords for easy lookup
50
+ self.all_cooking_keywords = []
51
+ for category, keywords in self.cooking_keywords.items():
52
+ self.all_cooking_keywords.extend(keywords)
53
 
54
  def process_results(self, results: List[Dict], user_query: str) -> Tuple[str, Dict[int, str]]:
55
  """Process search results and create comprehensive cooking summary"""
 
92
  return relevant_results[:10]
93
 
94
  def _calculate_relevance_score(self, result: Dict, user_query: str) -> float:
95
+ """Calculate enhanced cooking relevance score for a result"""
96
  score = 0.0
97
 
98
+ # Check title and content relevance
99
  title = result.get('title', '').lower()
100
+ content = result.get('content', '').lower()
101
  query_lower = user_query.lower()
102
 
103
+ # Direct query match in title (highest priority)
104
+ query_words = query_lower.split()
105
+ title_matches = sum(1 for word in query_words if word in title)
106
+ if title_matches > 0:
107
+ score += min(title_matches * 0.15, 0.4)
108
+
109
+ # Direct query match in content
110
+ content_matches = sum(1 for word in query_words if word in content)
111
+ if content_matches > 0:
112
+ score += min(content_matches * 0.05, 0.2)
113
+
114
+ # Enhanced cooking keyword scoring by category
115
+ for category, keywords in self.cooking_keywords.items():
116
+ category_matches = sum(1 for keyword in keywords if keyword in title)
117
+ if category_matches > 0:
118
+ # Different weights for different categories
119
+ if category == 'primary':
120
+ score += min(category_matches * 0.08, 0.25)
121
+ elif category == 'ingredients':
122
+ score += min(category_matches * 0.06, 0.2)
123
+ elif category == 'techniques':
124
+ score += min(category_matches * 0.07, 0.2)
125
+ elif category == 'cuisines':
126
+ score += min(category_matches * 0.05, 0.15)
127
+ else:
128
+ score += min(category_matches * 0.04, 0.1)
129
+
130
+ # Domain credibility for cooking sources (enhanced list)
131
  url = result.get('url', '').lower()
132
  credible_domains = [
133
  'allrecipes.com', 'foodnetwork.com', 'epicurious.com', 'seriouseats.com',
134
  'bonappetit.com', 'cooking.nytimes.com', 'tasteofhome.com', 'food.com',
135
  'bbcgoodfood.com', 'jamieoliver.com', 'gordonramsay.com', 'marthastewart.com',
136
+ 'kingarthurbaking.com', 'sallysbakingaddiction.com', 'smittenkitchen.com',
137
+ 'food52.com', 'cookinglight.com', 'eatingwell.com', 'delish.com',
138
+ 'tasty.co', 'buzzfeed.com/food', 'foodandwine.com', 'saveur.com'
139
  ]
140
 
141
  if any(domain in url for domain in credible_domains):
142
+ score += 0.25
143
 
144
  # Source type bonus for cooking
145
  source = result.get('source', '')
146
  if 'cooking' in source or 'recipe' in source or any(domain in source for domain in credible_domains):
147
+ score += 0.15
148
+
149
+ # Recipe-specific content bonus
150
+ if any(word in title for word in ['recipe', 'how to', 'tutorial', 'guide']):
151
+ score += 0.1
152
+
153
+ # URL path analysis for cooking content
154
+ if any(path in url for path in ['/recipe/', '/recipes/', '/cooking/', '/food/']):
155
+ score += 0.1
156
 
157
  return min(score, 1.0)
158
 
 
234
  return combined_summary
235
 
236
  def _group_by_topic(self, results: List[Dict]) -> Dict[str, List[Dict]]:
237
+ """Group results by enhanced cooking topics"""
238
  topics = {
239
  'recipes': [],
240
  'techniques': [],
241
  'ingredients': [],
242
+ 'equipment': [],
243
+ 'tips_tricks': [],
244
  'general': []
245
  }
246
 
 
249
  summary_lower = result.get('summary', '').lower()
250
  content_lower = f"{title_lower} {summary_lower}"
251
 
252
+ # Enhanced categorization by content
253
+ if any(word in content_lower for word in ['recipe', 'ingredients', 'instructions', 'steps', 'how to make']):
254
  topics['recipes'].append(result)
255
+ elif any(word in content_lower for word in ['technique', 'method', 'how to cook', 'cooking method', 'preparation']):
256
  topics['techniques'].append(result)
257
+ elif any(word in content_lower for word in ['ingredients', 'substitution', 'alternative', 'variation', 'seasoning', 'spices']):
258
  topics['ingredients'].append(result)
259
+ elif any(word in content_lower for word in ['equipment', 'tools', 'knife', 'pan', 'pot', 'oven', 'grill']):
260
+ topics['equipment'].append(result)
261
+ elif any(word in content_lower for word in ['tips', 'tricks', 'hacks', 'mistakes', 'troubleshooting', 'advice']):
262
+ topics['tips_tricks'].append(result)
263
  else:
264
  topics['general'].append(result)
265
 
 
272
 
273
  # Add topic header
274
  topic_headers = {
275
+ 'recipes': "**🍳 Recipes and Instructions:**",
276
+ 'techniques': "**👨‍🍳 Cooking Techniques:**",
277
+ 'ingredients': "**🥘 Ingredients and Substitutions:**",
278
+ 'equipment': "**🔪 Equipment and Tools:**",
279
+ 'tips_tricks': "**💡 Tips and Tricks:**",
280
+ 'general': "**📚 General Information:**"
281
  }
282
 
283
  header = topic_headers.get(topic, "**Information:**")