Nymbo commited on
Commit
24f0172
·
verified ·
1 Parent(s): 40db652

Web_Search: 4 new free search engine backends, new date filters, disabling safesearch by default

Browse files
Files changed (1) hide show
  1. Modules/Web_Search.py +233 -13
Modules/Web_Search.py CHANGED
@@ -19,6 +19,80 @@ TOOL_SUMMARY = (
19
 
20
  _SAFESEARCH_LEVEL = "off"
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def _extract_date_from_snippet(snippet: str) -> str:
24
  if not snippet:
@@ -116,10 +190,21 @@ def Web_Search(
116
  query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
117
  max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
118
  page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
119
- search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
120
  offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
 
 
 
121
  ) -> str:
122
- _log_call_start("Web_Search", query=query, max_results=max_results, page=page, search_type=search_type, offset=offset)
 
 
 
 
 
 
 
 
 
123
  if not query or not query.strip():
124
  result = "No search query provided. Please enter a search term."
125
  _log_call_end("Web_Search", _truncate_for_log(result))
@@ -139,21 +224,144 @@ def Web_Search(
139
  total_needed = actual_offset + max_results
140
  used_fallback = False
141
  original_search_type = search_type
 
 
 
142
 
143
  def _perform_search(stype: str) -> list[dict]:
144
  try:
145
  _search_rate_limiter.acquire()
146
  with DDGS() as ddgs:
147
  if stype == "text":
148
- raw_gen = ddgs.text(query, max_results=total_needed + 10, safesearch=_SAFESEARCH_LEVEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  elif stype == "news":
150
- raw_gen = ddgs.news(query, max_results=total_needed + 10, safesearch=_SAFESEARCH_LEVEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  elif stype == "images":
152
- raw_gen = ddgs.images(query, max_results=total_needed + 10, safesearch=_SAFESEARCH_LEVEL)
 
 
 
 
 
 
153
  elif stype == "videos":
154
- raw_gen = ddgs.videos(query, max_results=total_needed + 10, safesearch=_SAFESEARCH_LEVEL)
 
 
 
 
 
 
155
  else:
156
- raw_gen = ddgs.books(query, max_results=total_needed + 10, safesearch=_SAFESEARCH_LEVEL)
 
 
 
 
157
  try:
158
  return list(raw_gen)
159
  except Exception as inner_exc:
@@ -242,12 +450,6 @@ def build_interface() -> gr.Interface:
242
  gr.Textbox(label="Query", placeholder="topic OR site:example.com", max_lines=1),
243
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
244
  gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
245
- gr.Radio(
246
- label="Search Type",
247
- choices=["text", "news", "images", "videos", "books"],
248
- value="text",
249
- info="Type of content to search for",
250
- ),
251
  gr.Slider(
252
  minimum=0,
253
  maximum=1000,
@@ -256,6 +458,24 @@ def build_interface() -> gr.Interface:
256
  label="Offset",
257
  info="Result offset to start from (overrides page if > 0, use next_offset from previous search)",
258
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  ],
260
  outputs=gr.Textbox(label="Search Results", interactive=False, lines=20, max_lines=20),
261
  title="Web Search",
 
19
 
20
  _SAFESEARCH_LEVEL = "off"
21
 
22
+ # Defaults and choices for newly added parameters
23
+ BACKEND_CHOICES = [
24
+ "auto",
25
+ "duckduckgo",
26
+ "bing",
27
+ "brave",
28
+ "yahoo",
29
+ "wikipedia",
30
+ ]
31
+
32
+ # Allowed backends per type (explicit selection set)
33
+ _ALLOWED_BACKENDS = {
34
+ "text": ["duckduckgo", "bing", "brave", "yahoo", "wikipedia"],
35
+ "news": ["duckduckgo", "bing", "yahoo"],
36
+ "images": ["duckduckgo"],
37
+ "videos": ["duckduckgo"],
38
+ "books": ["annasarchive"],
39
+ }
40
+
41
+ # Auto order per type (used when backend == "auto"); wikipedia excluded for text
42
+ _AUTO_ORDER = {
43
+ "text": ["duckduckgo", "bing", "brave", "yahoo"],
44
+ "news": ["duckduckgo", "bing", "yahoo"],
45
+ "images": ["duckduckgo"],
46
+ "videos": ["duckduckgo"],
47
+ "books": ["annasarchive"],
48
+ }
49
+
50
+ # Date filter choices: canonical values used by resolver
51
+ DATE_FILTER_CHOICES = ["any", "day", "week", "month", "year"]
52
+
53
+
54
+ def _resolve_backend(search_type: str, backend_choice: str) -> str:
55
+ """Resolve backend string for DDGS based on search type and user choice.
56
+
57
+ - If backend_choice is "auto", return a comma-separated fallback order for that type.
58
+ - If backend_choice is not supported by the type, fall back to the first allowed backend.
59
+ - Books endpoint uses only 'annasarchive'.
60
+ """
61
+ stype = search_type if search_type in _ALLOWED_BACKENDS else "text"
62
+ allowed = _ALLOWED_BACKENDS[stype]
63
+ if backend_choice == "auto":
64
+ return ", ".join(_AUTO_ORDER[stype])
65
+ if stype == "books":
66
+ return "annasarchive"
67
+ # Validate backend against allowed set for this type
68
+ if backend_choice in allowed:
69
+ return backend_choice
70
+ # Fallback to first allowed backend
71
+ return allowed[0]
72
+
73
+
74
+ def _resolve_timelimit(date_filter: str, search_type: str) -> str | None:
75
+ """Map UI date filter to DDGS timelimit code per endpoint.
76
+
77
+ Returns one of: None, 'd', 'w', 'm', 'y'. For news/videos (which support d/w/m),
78
+ selecting 'year' will coerce to 'm' to stay within supported range.
79
+ """
80
+ normalized = (date_filter or "any").strip().lower()
81
+ if normalized in ("any", "none", ""):
82
+ return None
83
+ mapping = {
84
+ "day": "d",
85
+ "week": "w",
86
+ "month": "m",
87
+ "year": "y",
88
+ }
89
+ code = mapping.get(normalized)
90
+ if not code:
91
+ return None
92
+ if search_type in ("news", "videos") and code == "y":
93
+ return "m"
94
+ return code
95
+
96
 
97
  def _extract_date_from_snippet(snippet: str) -> str:
98
  if not snippet:
 
190
  query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
191
  max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
192
  page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
 
193
  offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
194
+ search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
195
+ backend: Annotated[str, "Search backend or ordered fallbacks. Use 'auto' for recommended order."] = "auto",
196
+ date_filter: Annotated[str, "Time filter: any, day, week, month, year."] = "any",
197
  ) -> str:
198
+ _log_call_start(
199
+ "Web_Search",
200
+ query=query,
201
+ max_results=max_results,
202
+ page=page,
203
+ search_type=search_type,
204
+ offset=offset,
205
+ backend=backend,
206
+ date_filter=date_filter,
207
+ )
208
  if not query or not query.strip():
209
  result = "No search query provided. Please enter a search term."
210
  _log_call_end("Web_Search", _truncate_for_log(result))
 
224
  total_needed = actual_offset + max_results
225
  used_fallback = False
226
  original_search_type = search_type
227
+ # Prepare cross-cutting parameters
228
+ resolved_backend = _resolve_backend(search_type, (backend or "auto").lower())
229
+ timelimit = _resolve_timelimit(date_filter, search_type)
230
 
231
  def _perform_search(stype: str) -> list[dict]:
232
  try:
233
  _search_rate_limiter.acquire()
234
  with DDGS() as ddgs:
235
  if stype == "text":
236
+ user_backend_choice = (backend or "auto").lower()
237
+ if user_backend_choice == "auto":
238
+ # Custom auto: DDG first, then append other engines
239
+ results: list[dict] = []
240
+ seen: set[str] = set()
241
+
242
+ def add_unique(items: list[dict], key_field: str) -> None:
243
+ for it in items or []:
244
+ url = (it.get(key_field, "") or "").strip()
245
+ if url and url not in seen:
246
+ seen.add(url)
247
+ results.append(it)
248
+
249
+ # First: duckduckgo
250
+ try:
251
+ ddg_items = list(
252
+ ddgs.text(
253
+ query,
254
+ max_results=total_needed + 10,
255
+ safesearch=_SAFESEARCH_LEVEL,
256
+ timelimit=timelimit,
257
+ backend="duckduckgo",
258
+ )
259
+ )
260
+ except Exception:
261
+ ddg_items = []
262
+ add_unique(ddg_items, "href")
263
+
264
+ # Then: other engines appended (excluding duckduckgo)
265
+ for eng in [b for b in _AUTO_ORDER["text"] if b != "duckduckgo"]:
266
+ try:
267
+ extra = list(
268
+ ddgs.text(
269
+ query,
270
+ max_results=total_needed + 10,
271
+ safesearch=_SAFESEARCH_LEVEL,
272
+ timelimit=timelimit,
273
+ backend=eng,
274
+ )
275
+ )
276
+ except Exception:
277
+ extra = []
278
+ add_unique(extra, "href")
279
+
280
+ return results
281
+ else:
282
+ raw_gen = ddgs.text(
283
+ query,
284
+ max_results=total_needed + 10,
285
+ safesearch=_SAFESEARCH_LEVEL,
286
+ timelimit=timelimit,
287
+ backend=resolved_backend,
288
+ )
289
  elif stype == "news":
290
+ user_backend_choice = (backend or "auto").lower()
291
+ if user_backend_choice == "auto":
292
+ # Custom auto: DDG first, then append other engines
293
+ results: list[dict] = []
294
+ seen: set[str] = set()
295
+
296
+ def add_unique(items: list[dict], key_field: str) -> None:
297
+ for it in items or []:
298
+ url = (it.get(key_field, "") or "").strip()
299
+ if url and url not in seen:
300
+ seen.add(url)
301
+ results.append(it)
302
+
303
+ # First: duckduckgo news
304
+ try:
305
+ ddg_news = list(
306
+ ddgs.news(
307
+ query,
308
+ max_results=total_needed + 10,
309
+ safesearch=_SAFESEARCH_LEVEL,
310
+ timelimit=timelimit,
311
+ backend="duckduckgo",
312
+ )
313
+ )
314
+ except Exception:
315
+ ddg_news = []
316
+ add_unique(ddg_news, "url")
317
+
318
+ # Then: other news engines appended
319
+ for eng in [b for b in _AUTO_ORDER["news"] if b != "duckduckgo"]:
320
+ try:
321
+ extra = list(
322
+ ddgs.news(
323
+ query,
324
+ max_results=total_needed + 10,
325
+ safesearch=_SAFESEARCH_LEVEL,
326
+ timelimit=timelimit,
327
+ backend=eng,
328
+ )
329
+ )
330
+ except Exception:
331
+ extra = []
332
+ add_unique(extra, "url")
333
+
334
+ return results
335
+ else:
336
+ raw_gen = ddgs.news(
337
+ query,
338
+ max_results=total_needed + 10,
339
+ safesearch=_SAFESEARCH_LEVEL,
340
+ timelimit=timelimit,
341
+ backend=_resolve_backend("news", (backend or "auto").lower()),
342
+ )
343
  elif stype == "images":
344
+ raw_gen = ddgs.images(
345
+ query,
346
+ max_results=total_needed + 10,
347
+ safesearch=_SAFESEARCH_LEVEL,
348
+ timelimit=timelimit,
349
+ backend=_resolve_backend("images", (backend or "auto").lower()),
350
+ )
351
  elif stype == "videos":
352
+ raw_gen = ddgs.videos(
353
+ query,
354
+ max_results=total_needed + 10,
355
+ safesearch=_SAFESEARCH_LEVEL,
356
+ timelimit=timelimit,
357
+ backend=_resolve_backend("videos", (backend or "auto").lower()),
358
+ )
359
  else:
360
+ raw_gen = ddgs.books(
361
+ query,
362
+ max_results=total_needed + 10,
363
+ backend=_resolve_backend("books", (backend or "auto").lower()),
364
+ )
365
  try:
366
  return list(raw_gen)
367
  except Exception as inner_exc:
 
450
  gr.Textbox(label="Query", placeholder="topic OR site:example.com", max_lines=1),
451
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
452
  gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
 
 
 
 
 
 
453
  gr.Slider(
454
  minimum=0,
455
  maximum=1000,
 
458
  label="Offset",
459
  info="Result offset to start from (overrides page if > 0, use next_offset from previous search)",
460
  ),
461
+ gr.Radio(
462
+ label="Search Type",
463
+ choices=["text", "news", "images", "videos", "books"],
464
+ value="text",
465
+ info="Type of content to search for",
466
+ ),
467
+ gr.Radio(
468
+ label="Backend",
469
+ choices=BACKEND_CHOICES,
470
+ value="auto",
471
+ info="Search engine backend or fallback order (auto applies recommended order)",
472
+ ),
473
+ gr.Radio(
474
+ label="Date filter",
475
+ choices=DATE_FILTER_CHOICES,
476
+ value="any",
477
+ info="Limit results to: day, week, month, or year (varies by type)",
478
+ ),
479
  ],
480
  outputs=gr.Textbox(label="Search Results", interactive=False, lines=20, max_lines=20),
481
  title="Web Search",