LiamKhoaLe commited on
Commit
b95b076
·
1 Parent(s): cfe6ce9

Upd imports

Browse files
models/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (351 Bytes)
 
models/__pycache__/llama.cpython-311.pyc DELETED
Binary file (8.51 kB)
 
models/__pycache__/summarizer.cpython-311.pyc DELETED
Binary file (13.4 kB)
 
search/engines/duckduckgo.py CHANGED
@@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
3
  import logging
4
  from typing import List, Dict
5
  import time
6
- from models.reranker import MedicalReranker
7
 
8
  logger = logging.getLogger(__name__)
9
 
@@ -21,7 +21,7 @@ class DuckDuckGoEngine:
21
  'Upgrade-Insecure-Requests': '1',
22
  })
23
  self.timeout = timeout
24
- self.reranker = MedicalReranker()
25
 
26
  def search(self, query: str, num_results: int = 10) -> List[Dict]:
27
  """Search with multiple DuckDuckGo strategies and medical focus"""
@@ -74,29 +74,12 @@ class DuckDuckGoEngine:
74
  filtered_results = self._filter_irrelevant_sources(results)
75
  logger.info(f"Filtered {len(results)} results to {len(filtered_results)} relevant results")
76
 
77
- # If we have results, use reranker; otherwise return what we have
78
  if filtered_results:
79
- try:
80
- reranked_results = self.reranker.rerank_results(clean_query, filtered_results, min_score)
81
- logger.info(f"Reranked {len(filtered_results)} results to {len(reranked_results)} high-quality results")
82
-
83
- # If reranking filtered out too many results, be more lenient
84
- if len(reranked_results) < min(3, num_results) and len(filtered_results) > 0:
85
- logger.warning(f"Reranking too strict ({len(reranked_results)} results), using fallback with lower threshold")
86
- # Try with even lower threshold
87
- fallback_results = self.reranker.rerank_results(clean_query, filtered_results, 0.05)
88
- if len(fallback_results) > len(reranked_results):
89
- return fallback_results[:num_results]
90
- else:
91
- # Last resort: return original filtered results with basic scoring
92
- for i, result in enumerate(filtered_results[:num_results]):
93
- result['composite_score'] = 0.5 - (i * 0.05) # Decreasing score
94
- return filtered_results[:num_results]
95
-
96
- return reranked_results[:num_results]
97
- except Exception as e:
98
- logger.warning(f"Reranking failed: {e}, returning filtered results")
99
- return filtered_results[:num_results]
100
 
101
  return filtered_results[:num_results]
102
 
 
3
  import logging
4
  from typing import List, Dict
5
  import time
6
+ # Reranker removed - using simple relevance scoring for cooking content
7
 
8
  logger = logging.getLogger(__name__)
9
 
 
21
  'Upgrade-Insecure-Requests': '1',
22
  })
23
  self.timeout = timeout
24
+ self.reranker = None # No complex reranking needed for cooking content
25
 
26
  def search(self, query: str, num_results: int = 10) -> List[Dict]:
27
  """Search with multiple DuckDuckGo strategies and medical focus"""
 
74
  filtered_results = self._filter_irrelevant_sources(results)
75
  logger.info(f"Filtered {len(results)} results to {len(filtered_results)} relevant results")
76
 
77
+ # Simple cooking relevance scoring
78
  if filtered_results:
79
+ # Add basic scoring based on cooking relevance
80
+ for i, result in enumerate(filtered_results[:num_results]):
81
+ result['composite_score'] = 0.8 - (i * 0.05) # Decreasing score
82
+ return filtered_results[:num_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  return filtered_results[:num_results]
85
 
search/engines/video.py CHANGED
@@ -5,12 +5,12 @@ from typing import List, Dict
5
  import time
6
  import re
7
  from urllib.parse import urlparse, quote
8
- from models.reranker import MedicalReranker
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
  class VideoSearchEngine:
13
- """Search engine for medical videos across multiple platforms"""
14
 
15
  def __init__(self, timeout: int = 15):
16
  self.session = requests.Session()
@@ -22,7 +22,7 @@ class VideoSearchEngine:
22
  'Connection': 'keep-alive',
23
  })
24
  self.timeout = timeout
25
- self.reranker = MedicalReranker()
26
 
27
  # Video platforms by language
28
  self.video_platforms = {
@@ -216,13 +216,21 @@ class VideoSearchEngine:
216
  except Exception as e:
217
  logger.warning(f"Fallback video search failed: {e}")
218
 
219
- # Use reranker to improve quality and relevance
220
  if results:
221
- reranked_results = self.reranker.filter_youtube_results(results, query)
222
- logger.info(f"Reranked {len(results)} video results to {len(reranked_results)} high-quality results")
223
- return reranked_results[:num_results]
 
 
 
 
 
 
 
 
224
 
225
- logger.info(f"Found {len(results)} medical video results")
226
  return results[:num_results]
227
 
228
  def _search_platform(self, query: str, platform: Dict, num_results: int) -> List[Dict]:
 
5
  import time
6
  import re
7
  from urllib.parse import urlparse, quote
8
+ # Reranker removed - using simple relevance scoring for cooking content
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
  class VideoSearchEngine:
13
+ """Search engine for cooking videos across multiple platforms"""
14
 
15
  def __init__(self, timeout: int = 15):
16
  self.session = requests.Session()
 
22
  'Connection': 'keep-alive',
23
  })
24
  self.timeout = timeout
25
+ self.reranker = None # No complex reranking needed for cooking content
26
 
27
  # Video platforms by language
28
  self.video_platforms = {
 
216
  except Exception as e:
217
  logger.warning(f"Fallback video search failed: {e}")
218
 
219
+ # Simple cooking relevance filtering
220
  if results:
221
+ # Filter for cooking relevance
222
+ cooking_keywords = ['recipe', 'cooking', 'baking', 'food', 'ingredient', 'kitchen', 'chef', 'meal', 'dish', 'cuisine', 'cook', 'bake', 'roast', 'grill', 'fry', 'boil', 'steam', 'season', 'spice', 'herb', 'sauce', 'marinade', 'dressing']
223
+ relevant_results = []
224
+ for result in results:
225
+ title = result.get('title', '').lower()
226
+ if any(keyword in title for keyword in cooking_keywords):
227
+ relevant_results.append(result)
228
+
229
+ if relevant_results:
230
+ results = relevant_results
231
+ logger.info(f"Filtered to {len(results)} cooking-relevant video results")
232
 
233
+ logger.info(f"Found {len(results)} cooking video results")
234
  return results[:num_results]
235
 
236
  def _search_platform(self, query: str, platform: Dict, num_results: int) -> List[Dict]: