Nymbo commited on
Commit
1a73c50
·
verified ·
1 Parent(s): cacc654

adding new search types: News, Images, Videos, Books

Browse files
Files changed (1) hide show
  1. app.py +130 -50
app.py CHANGED
@@ -488,13 +488,99 @@ def _extract_date_from_snippet(snippet: str) -> str:
488
  return ""
489
 
490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
492
  query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
493
  max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
494
  page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
 
495
  ) -> str:
496
  """
497
- Run a DuckDuckGo search and return numbered results with URLs, titles, snippets, and dates.
498
 
499
  Args:
500
  query (str): The search query string. Supports operators like site:, quotes for exact matching,
@@ -506,12 +592,17 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
506
  - Exclude terms: "cats -dogs"
507
  max_results (int): Number of results to return per page (1–20). Default: 5.
508
  page (int): Page number for pagination (1-based). Default: 1.
 
 
 
 
 
 
509
 
510
  Returns:
511
- str: Search results in readable format with titles, URLs, snippets, and publication dates
512
- when available, formatted as a numbered list with pagination info.
513
  """
514
- _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page)
515
  if not query or not query.strip():
516
  result = "No search query provided. Please enter a search term."
517
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
@@ -520,6 +611,9 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
520
  # Validate parameters
521
  max_results = max(1, min(20, max_results))
522
  page = max(1, page)
 
 
 
523
 
524
  # Calculate offset for pagination
525
  offset = (page - 1) * max_results
@@ -529,10 +623,19 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
529
  # Apply rate limiting to avoid being blocked
530
  _search_rate_limiter.acquire()
531
 
532
- # Perform search with timeout handling
533
- # We need to get more results than needed for pagination
534
  with DDGS() as ddgs:
535
- raw_gen = ddgs.text(query, max_results=total_needed + 10) # Get extra for safety
 
 
 
 
 
 
 
 
 
 
536
  raw = list(raw_gen)
537
 
538
  except Exception as e:
@@ -548,7 +651,7 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
548
  return result
549
 
550
  if not raw:
551
- result = f"No results found for query: {query}"
552
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
553
  return result
554
 
@@ -556,51 +659,21 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
556
  paginated_results = raw[offset:offset + max_results]
557
 
558
  if not paginated_results:
559
- result = f"No results found on page {page} for query: {query}. Try page 1 or reduce page number."
560
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
561
  return result
562
 
563
- results = []
564
- for r in paginated_results:
565
- title = (r.get("title") or "").strip()
566
- url = (r.get("href") or r.get("link") or "").strip()
567
- body = (r.get("body") or r.get("snippet") or "").strip()
568
-
569
- if not url:
570
- continue
571
-
572
- # Extract date from snippet
573
- date_found = _extract_date_from_snippet(body)
574
-
575
- result_obj = {
576
- "title": title or _domain_of(url),
577
- "url": url,
578
- "snippet": body,
579
- "date": date_found
580
- }
581
-
582
- results.append(result_obj)
583
-
584
- if not results:
585
- result = f"No valid results found on page {page} for query: {query}"
586
- _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
587
- return result
588
-
589
- # Format output in readable format with pagination info
590
  total_available = len(raw)
591
  start_num = offset + 1
592
- end_num = offset + len(results)
593
 
594
- lines = [f"Search results for: {query}"]
595
  lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
596
 
597
- for i, result in enumerate(results, start_num):
598
- lines.append(f"{i}. {result['title']}")
599
- lines.append(f" URL: {result['url']}")
600
- if result['snippet']:
601
- lines.append(f" Summary: {result['snippet']}")
602
- if result['date']:
603
- lines.append(f" Date: {result['date']}")
604
  lines.append("") # Empty line between results
605
 
606
  # Add pagination hint
@@ -608,7 +681,7 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
608
  lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
609
 
610
  result = "\n".join(lines)
611
- _log_call_end("Search_DuckDuckGo", f"page={page} results={len(results)} chars={len(result)}")
612
  return result
613
 
614
 
@@ -1138,19 +1211,26 @@ concise_interface = gr.Interface(
1138
  gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
1139
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
1140
  gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
 
 
 
 
 
 
1141
  ],
1142
  outputs=gr.Textbox(label="Search Results", interactive=False),
1143
  title="DuckDuckGo Search",
1144
  description=(
1145
- "<div style=\"text-align:center\">Web search with readable output format, date detection, and pagination support. Supports advanced search operators.</div>"
1146
  ),
1147
  api_description=(
1148
- "Run a DuckDuckGo search and return numbered results with URLs, titles, summaries, and publication dates when detectable. "
1149
  "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
1150
  "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
1151
  "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
1152
- "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination). "
1153
- "Returns formatted results with date metadata and pagination hints for accessing more results."
 
1154
  ),
1155
  flagging_mode="never",
1156
  submit_btn="Search",
 
488
  return ""
489
 
490
 
491
+ def _format_search_result(result: dict, search_type: str, index: int) -> list[str]:
492
+ """
493
+ Format a single search result based on the search type.
494
+ Returns a list of strings to be joined with newlines.
495
+ """
496
+ lines = []
497
+
498
+ if search_type == "text":
499
+ title = result.get("title", "").strip()
500
+ url = result.get("href", "").strip()
501
+ snippet = result.get("body", "").strip()
502
+ date = _extract_date_from_snippet(snippet)
503
+
504
+ lines.append(f"{index}. {title}")
505
+ lines.append(f" URL: {url}")
506
+ if snippet:
507
+ lines.append(f" Summary: {snippet}")
508
+ if date:
509
+ lines.append(f" Date: {date}")
510
+
511
+ elif search_type == "news":
512
+ title = result.get("title", "").strip()
513
+ url = result.get("url", "").strip()
514
+ body = result.get("body", "").strip()
515
+ date = result.get("date", "").strip()
516
+ source = result.get("source", "").strip()
517
+
518
+ lines.append(f"{index}. {title}")
519
+ lines.append(f" URL: {url}")
520
+ if source:
521
+ lines.append(f" Source: {source}")
522
+ if date:
523
+ lines.append(f" Date: {date}")
524
+ if body:
525
+ lines.append(f" Summary: {body}")
526
+
527
+ elif search_type == "images":
528
+ title = result.get("title", "").strip()
529
+ image_url = result.get("image", "").strip()
530
+ source_url = result.get("url", "").strip()
531
+ source = result.get("source", "").strip()
532
+ width = result.get("width", "")
533
+ height = result.get("height", "")
534
+
535
+ lines.append(f"{index}. {title}")
536
+ lines.append(f" Image: {image_url}")
537
+ lines.append(f" Source: {source_url}")
538
+ if source:
539
+ lines.append(f" Publisher: {source}")
540
+ if width and height:
541
+ lines.append(f" Dimensions: {width}x{height}")
542
+
543
+ elif search_type == "videos":
544
+ title = result.get("title", "").strip()
545
+ description = result.get("description", "").strip()
546
+ duration = result.get("duration", "").strip()
547
+ published = result.get("published", "").strip()
548
+ uploader = result.get("uploader", "").strip()
549
+ embed_url = result.get("embed_url", "").strip()
550
+
551
+ lines.append(f"{index}. {title}")
552
+ if embed_url:
553
+ lines.append(f" Video: {embed_url}")
554
+ if uploader:
555
+ lines.append(f" Uploader: {uploader}")
556
+ if duration:
557
+ lines.append(f" Duration: {duration}")
558
+ if published:
559
+ lines.append(f" Published: {published}")
560
+ if description:
561
+ lines.append(f" Description: {description}")
562
+
563
+ elif search_type == "books":
564
+ title = result.get("title", "").strip()
565
+ url = result.get("url", "").strip()
566
+ body = result.get("body", "").strip()
567
+
568
+ lines.append(f"{index}. {title}")
569
+ lines.append(f" URL: {url}")
570
+ if body:
571
+ lines.append(f" Description: {body}")
572
+
573
+ return lines
574
+
575
+
576
  def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
577
  query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
578
  max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
579
  page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
580
+ search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
581
  ) -> str:
582
  """
583
+ Run a DuckDuckGo search and return formatted results with support for multiple content types.
584
 
585
  Args:
586
  query (str): The search query string. Supports operators like site:, quotes for exact matching,
 
592
  - Exclude terms: "cats -dogs"
593
  max_results (int): Number of results to return per page (1–20). Default: 5.
594
  page (int): Page number for pagination (1-based). Default: 1.
595
+ search_type (str): Type of search to perform:
596
+ - "text": Web pages (default)
597
+ - "news": News articles with dates and sources
598
+ - "images": Image results with dimensions and sources
599
+ - "videos": Video results with duration and upload info
600
+ - "books": Book search results
601
 
602
  Returns:
603
+ str: Search results formatted appropriately for the search type, with pagination info.
 
604
  """
605
+ _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type)
606
  if not query or not query.strip():
607
  result = "No search query provided. Please enter a search term."
608
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
 
611
  # Validate parameters
612
  max_results = max(1, min(20, max_results))
613
  page = max(1, page)
614
+ valid_types = ["text", "news", "images", "videos", "books"]
615
+ if search_type not in valid_types:
616
+ search_type = "text"
617
 
618
  # Calculate offset for pagination
619
  offset = (page - 1) * max_results
 
623
  # Apply rate limiting to avoid being blocked
624
  _search_rate_limiter.acquire()
625
 
626
+ # Perform search with timeout handling based on search type
 
627
  with DDGS() as ddgs:
628
+ if search_type == "text":
629
+ raw_gen = ddgs.text(query, max_results=total_needed + 10)
630
+ elif search_type == "news":
631
+ raw_gen = ddgs.news(query, max_results=total_needed + 10)
632
+ elif search_type == "images":
633
+ raw_gen = ddgs.images(query, max_results=total_needed + 10)
634
+ elif search_type == "videos":
635
+ raw_gen = ddgs.videos(query, max_results=total_needed + 10)
636
+ elif search_type == "books":
637
+ raw_gen = ddgs.books(query, max_results=total_needed + 10)
638
+
639
  raw = list(raw_gen)
640
 
641
  except Exception as e:
 
651
  return result
652
 
653
  if not raw:
654
+ result = f"No {search_type} results found for query: {query}"
655
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
656
  return result
657
 
 
659
  paginated_results = raw[offset:offset + max_results]
660
 
661
  if not paginated_results:
662
+ result = f"No {search_type} results found on page {page} for query: {query}. Try page 1 or reduce page number."
663
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
664
  return result
665
 
666
+ # Format results based on search type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  total_available = len(raw)
668
  start_num = offset + 1
669
+ end_num = offset + len(paginated_results)
670
 
671
+ lines = [f"{search_type.title()} search results for: {query}"]
672
  lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
673
 
674
+ for i, result in enumerate(paginated_results, start_num):
675
+ result_lines = _format_search_result(result, search_type, i)
676
+ lines.extend(result_lines)
 
 
 
 
677
  lines.append("") # Empty line between results
678
 
679
  # Add pagination hint
 
681
  lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
682
 
683
  result = "\n".join(lines)
684
+ _log_call_end("Search_DuckDuckGo", f"type={search_type} page={page} results={len(paginated_results)} chars={len(result)}")
685
  return result
686
 
687
 
 
1211
  gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
1212
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
1213
  gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
1214
+ gr.Radio(
1215
+ label="Search Type",
1216
+ choices=["text", "news", "images", "videos", "books"],
1217
+ value="text",
1218
+ info="Type of content to search for"
1219
+ ),
1220
  ],
1221
  outputs=gr.Textbox(label="Search Results", interactive=False),
1222
  title="DuckDuckGo Search",
1223
  description=(
1224
+ "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and pagination. Supports text, news, images, videos, and books.</div>"
1225
  ),
1226
  api_description=(
1227
+ "Run a DuckDuckGo search with support for multiple content types and return formatted results. "
1228
  "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
1229
  "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
1230
  "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
1231
+ "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
1232
+ "search_type (str: text/news/images/videos/books). "
1233
+ "Returns appropriately formatted results with metadata and pagination hints for each content type."
1234
  ),
1235
  flagging_mode="never",
1236
  submit_btn="Search",