Spaces:
Sleeping
Sleeping
Commit
·
b95b076
1
Parent(s):
cfe6ce9
Upd imports
Browse files
models/__pycache__/__init__.cpython-311.pyc
DELETED
|
Binary file (351 Bytes)
|
|
|
models/__pycache__/llama.cpython-311.pyc
DELETED
|
Binary file (8.51 kB)
|
|
|
models/__pycache__/summarizer.cpython-311.pyc
DELETED
|
Binary file (13.4 kB)
|
|
|
search/engines/duckduckgo.py
CHANGED
|
@@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
|
|
| 3 |
import logging
|
| 4 |
from typing import List, Dict
|
| 5 |
import time
|
| 6 |
-
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
@@ -21,7 +21,7 @@ class DuckDuckGoEngine:
|
|
| 21 |
'Upgrade-Insecure-Requests': '1',
|
| 22 |
})
|
| 23 |
self.timeout = timeout
|
| 24 |
-
self.reranker =
|
| 25 |
|
| 26 |
def search(self, query: str, num_results: int = 10) -> List[Dict]:
|
| 27 |
"""Search with multiple DuckDuckGo strategies and medical focus"""
|
|
@@ -74,29 +74,12 @@ class DuckDuckGoEngine:
|
|
| 74 |
filtered_results = self._filter_irrelevant_sources(results)
|
| 75 |
logger.info(f"Filtered {len(results)} results to {len(filtered_results)} relevant results")
|
| 76 |
|
| 77 |
-
#
|
| 78 |
if filtered_results:
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
# If reranking filtered out too many results, be more lenient
|
| 84 |
-
if len(reranked_results) < min(3, num_results) and len(filtered_results) > 0:
|
| 85 |
-
logger.warning(f"Reranking too strict ({len(reranked_results)} results), using fallback with lower threshold")
|
| 86 |
-
# Try with even lower threshold
|
| 87 |
-
fallback_results = self.reranker.rerank_results(clean_query, filtered_results, 0.05)
|
| 88 |
-
if len(fallback_results) > len(reranked_results):
|
| 89 |
-
return fallback_results[:num_results]
|
| 90 |
-
else:
|
| 91 |
-
# Last resort: return original filtered results with basic scoring
|
| 92 |
-
for i, result in enumerate(filtered_results[:num_results]):
|
| 93 |
-
result['composite_score'] = 0.5 - (i * 0.05) # Decreasing score
|
| 94 |
-
return filtered_results[:num_results]
|
| 95 |
-
|
| 96 |
-
return reranked_results[:num_results]
|
| 97 |
-
except Exception as e:
|
| 98 |
-
logger.warning(f"Reranking failed: {e}, returning filtered results")
|
| 99 |
-
return filtered_results[:num_results]
|
| 100 |
|
| 101 |
return filtered_results[:num_results]
|
| 102 |
|
|
|
|
| 3 |
import logging
|
| 4 |
from typing import List, Dict
|
| 5 |
import time
|
| 6 |
+
# Reranker removed - using simple relevance scoring for cooking content
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
|
|
| 21 |
'Upgrade-Insecure-Requests': '1',
|
| 22 |
})
|
| 23 |
self.timeout = timeout
|
| 24 |
+
self.reranker = None # No complex reranking needed for cooking content
|
| 25 |
|
| 26 |
def search(self, query: str, num_results: int = 10) -> List[Dict]:
|
| 27 |
"""Search with multiple DuckDuckGo strategies and medical focus"""
|
|
|
|
| 74 |
filtered_results = self._filter_irrelevant_sources(results)
|
| 75 |
logger.info(f"Filtered {len(results)} results to {len(filtered_results)} relevant results")
|
| 76 |
|
| 77 |
+
# Simple cooking relevance scoring
|
| 78 |
if filtered_results:
|
| 79 |
+
# Add basic scoring based on cooking relevance
|
| 80 |
+
for i, result in enumerate(filtered_results[:num_results]):
|
| 81 |
+
result['composite_score'] = 0.8 - (i * 0.05) # Decreasing score
|
| 82 |
+
return filtered_results[:num_results]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
return filtered_results[:num_results]
|
| 85 |
|
search/engines/video.py
CHANGED
|
@@ -5,12 +5,12 @@ from typing import List, Dict
|
|
| 5 |
import time
|
| 6 |
import re
|
| 7 |
from urllib.parse import urlparse, quote
|
| 8 |
-
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
class VideoSearchEngine:
|
| 13 |
-
"""Search engine for
|
| 14 |
|
| 15 |
def __init__(self, timeout: int = 15):
|
| 16 |
self.session = requests.Session()
|
|
@@ -22,7 +22,7 @@ class VideoSearchEngine:
|
|
| 22 |
'Connection': 'keep-alive',
|
| 23 |
})
|
| 24 |
self.timeout = timeout
|
| 25 |
-
self.reranker =
|
| 26 |
|
| 27 |
# Video platforms by language
|
| 28 |
self.video_platforms = {
|
|
@@ -216,13 +216,21 @@ class VideoSearchEngine:
|
|
| 216 |
except Exception as e:
|
| 217 |
logger.warning(f"Fallback video search failed: {e}")
|
| 218 |
|
| 219 |
-
#
|
| 220 |
if results:
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
logger.info(f"Found {len(results)}
|
| 226 |
return results[:num_results]
|
| 227 |
|
| 228 |
def _search_platform(self, query: str, platform: Dict, num_results: int) -> List[Dict]:
|
|
|
|
| 5 |
import time
|
| 6 |
import re
|
| 7 |
from urllib.parse import urlparse, quote
|
| 8 |
+
# Reranker removed - using simple relevance scoring for cooking content
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
class VideoSearchEngine:
|
| 13 |
+
"""Search engine for cooking videos across multiple platforms"""
|
| 14 |
|
| 15 |
def __init__(self, timeout: int = 15):
|
| 16 |
self.session = requests.Session()
|
|
|
|
| 22 |
'Connection': 'keep-alive',
|
| 23 |
})
|
| 24 |
self.timeout = timeout
|
| 25 |
+
self.reranker = None # No complex reranking needed for cooking content
|
| 26 |
|
| 27 |
# Video platforms by language
|
| 28 |
self.video_platforms = {
|
|
|
|
| 216 |
except Exception as e:
|
| 217 |
logger.warning(f"Fallback video search failed: {e}")
|
| 218 |
|
| 219 |
+
# Simple cooking relevance filtering
|
| 220 |
if results:
|
| 221 |
+
# Filter for cooking relevance
|
| 222 |
+
cooking_keywords = ['recipe', 'cooking', 'baking', 'food', 'ingredient', 'kitchen', 'chef', 'meal', 'dish', 'cuisine', 'cook', 'bake', 'roast', 'grill', 'fry', 'boil', 'steam', 'season', 'spice', 'herb', 'sauce', 'marinade', 'dressing']
|
| 223 |
+
relevant_results = []
|
| 224 |
+
for result in results:
|
| 225 |
+
title = result.get('title', '').lower()
|
| 226 |
+
if any(keyword in title for keyword in cooking_keywords):
|
| 227 |
+
relevant_results.append(result)
|
| 228 |
+
|
| 229 |
+
if relevant_results:
|
| 230 |
+
results = relevant_results
|
| 231 |
+
logger.info(f"Filtered to {len(results)} cooking-relevant video results")
|
| 232 |
|
| 233 |
+
logger.info(f"Found {len(results)} cooking video results")
|
| 234 |
return results[:num_results]
|
| 235 |
|
| 236 |
def _search_platform(self, query: str, platform: Dict, num_results: int) -> List[Dict]:
|