adding new search types: News, Images, Videos, Books
Browse files
app.py
CHANGED
|
@@ -488,13 +488,99 @@ def _extract_date_from_snippet(snippet: str) -> str:
|
|
| 488 |
return ""
|
| 489 |
|
| 490 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
| 492 |
query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
|
| 493 |
max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
|
| 494 |
page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
|
|
|
|
| 495 |
) -> str:
|
| 496 |
"""
|
| 497 |
-
Run a DuckDuckGo search and return
|
| 498 |
|
| 499 |
Args:
|
| 500 |
query (str): The search query string. Supports operators like site:, quotes for exact matching,
|
|
@@ -506,12 +592,17 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 506 |
- Exclude terms: "cats -dogs"
|
| 507 |
max_results (int): Number of results to return per page (1–20). Default: 5.
|
| 508 |
page (int): Page number for pagination (1-based). Default: 1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
Returns:
|
| 511 |
-
str: Search results
|
| 512 |
-
when available, formatted as a numbered list with pagination info.
|
| 513 |
"""
|
| 514 |
-
_log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page)
|
| 515 |
if not query or not query.strip():
|
| 516 |
result = "No search query provided. Please enter a search term."
|
| 517 |
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
|
@@ -520,6 +611,9 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 520 |
# Validate parameters
|
| 521 |
max_results = max(1, min(20, max_results))
|
| 522 |
page = max(1, page)
|
|
|
|
|
|
|
|
|
|
| 523 |
|
| 524 |
# Calculate offset for pagination
|
| 525 |
offset = (page - 1) * max_results
|
|
@@ -529,10 +623,19 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 529 |
# Apply rate limiting to avoid being blocked
|
| 530 |
_search_rate_limiter.acquire()
|
| 531 |
|
| 532 |
-
# Perform search with timeout handling
|
| 533 |
-
# We need to get more results than needed for pagination
|
| 534 |
with DDGS() as ddgs:
|
| 535 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
raw = list(raw_gen)
|
| 537 |
|
| 538 |
except Exception as e:
|
|
@@ -548,7 +651,7 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 548 |
return result
|
| 549 |
|
| 550 |
if not raw:
|
| 551 |
-
result = f"No results found for query: {query}"
|
| 552 |
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
| 553 |
return result
|
| 554 |
|
|
@@ -556,51 +659,21 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 556 |
paginated_results = raw[offset:offset + max_results]
|
| 557 |
|
| 558 |
if not paginated_results:
|
| 559 |
-
result = f"No results found on page {page} for query: {query}. Try page 1 or reduce page number."
|
| 560 |
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
| 561 |
return result
|
| 562 |
|
| 563 |
-
results
|
| 564 |
-
for r in paginated_results:
|
| 565 |
-
title = (r.get("title") or "").strip()
|
| 566 |
-
url = (r.get("href") or r.get("link") or "").strip()
|
| 567 |
-
body = (r.get("body") or r.get("snippet") or "").strip()
|
| 568 |
-
|
| 569 |
-
if not url:
|
| 570 |
-
continue
|
| 571 |
-
|
| 572 |
-
# Extract date from snippet
|
| 573 |
-
date_found = _extract_date_from_snippet(body)
|
| 574 |
-
|
| 575 |
-
result_obj = {
|
| 576 |
-
"title": title or _domain_of(url),
|
| 577 |
-
"url": url,
|
| 578 |
-
"snippet": body,
|
| 579 |
-
"date": date_found
|
| 580 |
-
}
|
| 581 |
-
|
| 582 |
-
results.append(result_obj)
|
| 583 |
-
|
| 584 |
-
if not results:
|
| 585 |
-
result = f"No valid results found on page {page} for query: {query}"
|
| 586 |
-
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
| 587 |
-
return result
|
| 588 |
-
|
| 589 |
-
# Format output in readable format with pagination info
|
| 590 |
total_available = len(raw)
|
| 591 |
start_num = offset + 1
|
| 592 |
-
end_num = offset + len(
|
| 593 |
|
| 594 |
-
lines = [f"
|
| 595 |
lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
|
| 596 |
|
| 597 |
-
for i, result in enumerate(
|
| 598 |
-
|
| 599 |
-
lines.
|
| 600 |
-
if result['snippet']:
|
| 601 |
-
lines.append(f" Summary: {result['snippet']}")
|
| 602 |
-
if result['date']:
|
| 603 |
-
lines.append(f" Date: {result['date']}")
|
| 604 |
lines.append("") # Empty line between results
|
| 605 |
|
| 606 |
# Add pagination hint
|
|
@@ -608,7 +681,7 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 608 |
lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
|
| 609 |
|
| 610 |
result = "\n".join(lines)
|
| 611 |
-
_log_call_end("Search_DuckDuckGo", f"page={page} results={len(
|
| 612 |
return result
|
| 613 |
|
| 614 |
|
|
@@ -1138,19 +1211,26 @@ concise_interface = gr.Interface(
|
|
| 1138 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
| 1139 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
| 1140 |
gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1141 |
],
|
| 1142 |
outputs=gr.Textbox(label="Search Results", interactive=False),
|
| 1143 |
title="DuckDuckGo Search",
|
| 1144 |
description=(
|
| 1145 |
-
"<div style=\"text-align:center\">
|
| 1146 |
),
|
| 1147 |
api_description=(
|
| 1148 |
-
"Run a DuckDuckGo search
|
| 1149 |
"Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
|
| 1150 |
"OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
|
| 1151 |
"'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
|
| 1152 |
-
"Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination)
|
| 1153 |
-
"
|
|
|
|
| 1154 |
),
|
| 1155 |
flagging_mode="never",
|
| 1156 |
submit_btn="Search",
|
|
|
|
| 488 |
return ""
|
| 489 |
|
| 490 |
|
| 491 |
+
def _format_search_result(result: dict, search_type: str, index: int) -> list[str]:
|
| 492 |
+
"""
|
| 493 |
+
Format a single search result based on the search type.
|
| 494 |
+
Returns a list of strings to be joined with newlines.
|
| 495 |
+
"""
|
| 496 |
+
lines = []
|
| 497 |
+
|
| 498 |
+
if search_type == "text":
|
| 499 |
+
title = result.get("title", "").strip()
|
| 500 |
+
url = result.get("href", "").strip()
|
| 501 |
+
snippet = result.get("body", "").strip()
|
| 502 |
+
date = _extract_date_from_snippet(snippet)
|
| 503 |
+
|
| 504 |
+
lines.append(f"{index}. {title}")
|
| 505 |
+
lines.append(f" URL: {url}")
|
| 506 |
+
if snippet:
|
| 507 |
+
lines.append(f" Summary: {snippet}")
|
| 508 |
+
if date:
|
| 509 |
+
lines.append(f" Date: {date}")
|
| 510 |
+
|
| 511 |
+
elif search_type == "news":
|
| 512 |
+
title = result.get("title", "").strip()
|
| 513 |
+
url = result.get("url", "").strip()
|
| 514 |
+
body = result.get("body", "").strip()
|
| 515 |
+
date = result.get("date", "").strip()
|
| 516 |
+
source = result.get("source", "").strip()
|
| 517 |
+
|
| 518 |
+
lines.append(f"{index}. {title}")
|
| 519 |
+
lines.append(f" URL: {url}")
|
| 520 |
+
if source:
|
| 521 |
+
lines.append(f" Source: {source}")
|
| 522 |
+
if date:
|
| 523 |
+
lines.append(f" Date: {date}")
|
| 524 |
+
if body:
|
| 525 |
+
lines.append(f" Summary: {body}")
|
| 526 |
+
|
| 527 |
+
elif search_type == "images":
|
| 528 |
+
title = result.get("title", "").strip()
|
| 529 |
+
image_url = result.get("image", "").strip()
|
| 530 |
+
source_url = result.get("url", "").strip()
|
| 531 |
+
source = result.get("source", "").strip()
|
| 532 |
+
width = result.get("width", "")
|
| 533 |
+
height = result.get("height", "")
|
| 534 |
+
|
| 535 |
+
lines.append(f"{index}. {title}")
|
| 536 |
+
lines.append(f" Image: {image_url}")
|
| 537 |
+
lines.append(f" Source: {source_url}")
|
| 538 |
+
if source:
|
| 539 |
+
lines.append(f" Publisher: {source}")
|
| 540 |
+
if width and height:
|
| 541 |
+
lines.append(f" Dimensions: {width}x{height}")
|
| 542 |
+
|
| 543 |
+
elif search_type == "videos":
|
| 544 |
+
title = result.get("title", "").strip()
|
| 545 |
+
description = result.get("description", "").strip()
|
| 546 |
+
duration = result.get("duration", "").strip()
|
| 547 |
+
published = result.get("published", "").strip()
|
| 548 |
+
uploader = result.get("uploader", "").strip()
|
| 549 |
+
embed_url = result.get("embed_url", "").strip()
|
| 550 |
+
|
| 551 |
+
lines.append(f"{index}. {title}")
|
| 552 |
+
if embed_url:
|
| 553 |
+
lines.append(f" Video: {embed_url}")
|
| 554 |
+
if uploader:
|
| 555 |
+
lines.append(f" Uploader: {uploader}")
|
| 556 |
+
if duration:
|
| 557 |
+
lines.append(f" Duration: {duration}")
|
| 558 |
+
if published:
|
| 559 |
+
lines.append(f" Published: {published}")
|
| 560 |
+
if description:
|
| 561 |
+
lines.append(f" Description: {description}")
|
| 562 |
+
|
| 563 |
+
elif search_type == "books":
|
| 564 |
+
title = result.get("title", "").strip()
|
| 565 |
+
url = result.get("url", "").strip()
|
| 566 |
+
body = result.get("body", "").strip()
|
| 567 |
+
|
| 568 |
+
lines.append(f"{index}. {title}")
|
| 569 |
+
lines.append(f" URL: {url}")
|
| 570 |
+
if body:
|
| 571 |
+
lines.append(f" Description: {body}")
|
| 572 |
+
|
| 573 |
+
return lines
|
| 574 |
+
|
| 575 |
+
|
| 576 |
def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
| 577 |
query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
|
| 578 |
max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
|
| 579 |
page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
|
| 580 |
+
search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
|
| 581 |
) -> str:
|
| 582 |
"""
|
| 583 |
+
Run a DuckDuckGo search and return formatted results with support for multiple content types.
|
| 584 |
|
| 585 |
Args:
|
| 586 |
query (str): The search query string. Supports operators like site:, quotes for exact matching,
|
|
|
|
| 592 |
- Exclude terms: "cats -dogs"
|
| 593 |
max_results (int): Number of results to return per page (1–20). Default: 5.
|
| 594 |
page (int): Page number for pagination (1-based). Default: 1.
|
| 595 |
+
search_type (str): Type of search to perform:
|
| 596 |
+
- "text": Web pages (default)
|
| 597 |
+
- "news": News articles with dates and sources
|
| 598 |
+
- "images": Image results with dimensions and sources
|
| 599 |
+
- "videos": Video results with duration and upload info
|
| 600 |
+
- "books": Book search results
|
| 601 |
|
| 602 |
Returns:
|
| 603 |
+
str: Search results formatted appropriately for the search type, with pagination info.
|
|
|
|
| 604 |
"""
|
| 605 |
+
_log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type)
|
| 606 |
if not query or not query.strip():
|
| 607 |
result = "No search query provided. Please enter a search term."
|
| 608 |
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
|
|
|
| 611 |
# Validate parameters
|
| 612 |
max_results = max(1, min(20, max_results))
|
| 613 |
page = max(1, page)
|
| 614 |
+
valid_types = ["text", "news", "images", "videos", "books"]
|
| 615 |
+
if search_type not in valid_types:
|
| 616 |
+
search_type = "text"
|
| 617 |
|
| 618 |
# Calculate offset for pagination
|
| 619 |
offset = (page - 1) * max_results
|
|
|
|
| 623 |
# Apply rate limiting to avoid being blocked
|
| 624 |
_search_rate_limiter.acquire()
|
| 625 |
|
| 626 |
+
# Perform search with timeout handling based on search type
|
|
|
|
| 627 |
with DDGS() as ddgs:
|
| 628 |
+
if search_type == "text":
|
| 629 |
+
raw_gen = ddgs.text(query, max_results=total_needed + 10)
|
| 630 |
+
elif search_type == "news":
|
| 631 |
+
raw_gen = ddgs.news(query, max_results=total_needed + 10)
|
| 632 |
+
elif search_type == "images":
|
| 633 |
+
raw_gen = ddgs.images(query, max_results=total_needed + 10)
|
| 634 |
+
elif search_type == "videos":
|
| 635 |
+
raw_gen = ddgs.videos(query, max_results=total_needed + 10)
|
| 636 |
+
elif search_type == "books":
|
| 637 |
+
raw_gen = ddgs.books(query, max_results=total_needed + 10)
|
| 638 |
+
|
| 639 |
raw = list(raw_gen)
|
| 640 |
|
| 641 |
except Exception as e:
|
|
|
|
| 651 |
return result
|
| 652 |
|
| 653 |
if not raw:
|
| 654 |
+
result = f"No {search_type} results found for query: {query}"
|
| 655 |
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
| 656 |
return result
|
| 657 |
|
|
|
|
| 659 |
paginated_results = raw[offset:offset + max_results]
|
| 660 |
|
| 661 |
if not paginated_results:
|
| 662 |
+
result = f"No {search_type} results found on page {page} for query: {query}. Try page 1 or reduce page number."
|
| 663 |
_log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
|
| 664 |
return result
|
| 665 |
|
| 666 |
+
# Format results based on search type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
total_available = len(raw)
|
| 668 |
start_num = offset + 1
|
| 669 |
+
end_num = offset + len(paginated_results)
|
| 670 |
|
| 671 |
+
lines = [f"{search_type.title()} search results for: {query}"]
|
| 672 |
lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
|
| 673 |
|
| 674 |
+
for i, result in enumerate(paginated_results, start_num):
|
| 675 |
+
result_lines = _format_search_result(result, search_type, i)
|
| 676 |
+
lines.extend(result_lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
lines.append("") # Empty line between results
|
| 678 |
|
| 679 |
# Add pagination hint
|
|
|
|
| 681 |
lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
|
| 682 |
|
| 683 |
result = "\n".join(lines)
|
| 684 |
+
_log_call_end("Search_DuckDuckGo", f"type={search_type} page={page} results={len(paginated_results)} chars={len(result)}")
|
| 685 |
return result
|
| 686 |
|
| 687 |
|
|
|
|
| 1211 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
| 1212 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
| 1213 |
gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
|
| 1214 |
+
gr.Radio(
|
| 1215 |
+
label="Search Type",
|
| 1216 |
+
choices=["text", "news", "images", "videos", "books"],
|
| 1217 |
+
value="text",
|
| 1218 |
+
info="Type of content to search for"
|
| 1219 |
+
),
|
| 1220 |
],
|
| 1221 |
outputs=gr.Textbox(label="Search Results", interactive=False),
|
| 1222 |
title="DuckDuckGo Search",
|
| 1223 |
description=(
|
| 1224 |
+
"<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and pagination. Supports text, news, images, videos, and books.</div>"
|
| 1225 |
),
|
| 1226 |
api_description=(
|
| 1227 |
+
"Run a DuckDuckGo search with support for multiple content types and return formatted results. "
|
| 1228 |
"Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
|
| 1229 |
"OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
|
| 1230 |
"'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
|
| 1231 |
+
"Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
|
| 1232 |
+
"search_type (str: text/news/images/videos/books). "
|
| 1233 |
+
"Returns appropriately formatted results with metadata and pagination hints for each content type."
|
| 1234 |
),
|
| 1235 |
flagging_mode="never",
|
| 1236 |
submit_btn="Search",
|