Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on Sep 15

Commit

1a73c50

verified ·

1 Parent(s): cacc654

adding new search types: News, Images, Videos, Books

Browse files

Files changed (1) hide show

app.py +130 -50

app.py CHANGED Viewed

@@ -488,13 +488,99 @@ def _extract_date_from_snippet(snippet: str) -> str:
     return ""
 def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
     query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
     max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
     page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
 ) -> str:
     """
-    Run a DuckDuckGo search and return numbered results with URLs, titles, snippets, and dates.
     Args:
         query (str): The search query string. Supports operators like site:, quotes for exact matching,
@@ -506,12 +592,17 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
                - Exclude terms: "cats -dogs"
         max_results (int): Number of results to return per page (1–20). Default: 5.
         page (int): Page number for pagination (1-based). Default: 1.
     Returns:
-        str: Search results in readable format with titles, URLs, snippets, and publication dates
-             when available, formatted as a numbered list with pagination info.
     """
-    _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page)
     if not query or not query.strip():
         result = "No search query provided. Please enter a search term."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
@@ -520,6 +611,9 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
     # Validate parameters
     max_results = max(1, min(20, max_results))
     page = max(1, page)
     # Calculate offset for pagination
     offset = (page - 1) * max_results
@@ -529,10 +623,19 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
         # Apply rate limiting to avoid being blocked
         _search_rate_limiter.acquire()
-        # Perform search with timeout handling
-        # We need to get more results than needed for pagination
         with DDGS() as ddgs:
-            raw_gen = ddgs.text(query, max_results=total_needed + 10)  # Get extra for safety
             raw = list(raw_gen)
     except Exception as e:
@@ -548,7 +651,7 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
         return result
     if not raw:
-        result = f"No results found for query: {query}"
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
@@ -556,51 +659,21 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
     paginated_results = raw[offset:offset + max_results]
     if not paginated_results:
-        result = f"No results found on page {page} for query: {query}. Try page 1 or reduce page number."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
-    results = []
-    for r in paginated_results:
-        title = (r.get("title") or "").strip()
-        url = (r.get("href") or r.get("link") or "").strip()
-        body = (r.get("body") or r.get("snippet") or "").strip()
-        if not url:
-            continue
-        # Extract date from snippet
-        date_found = _extract_date_from_snippet(body)
-        result_obj = {
-            "title": title or _domain_of(url),
-            "url": url,
-            "snippet": body,
-            "date": date_found
-        }
-        results.append(result_obj)
-    if not results:
-        result = f"No valid results found on page {page} for query: {query}"
-        _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
-        return result
-    # Format output in readable format with pagination info
     total_available = len(raw)
     start_num = offset + 1
-    end_num = offset + len(results)
-    lines = [f"Search results for: {query}"]
     lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
-    for i, result in enumerate(results, start_num):
-        lines.append(f"{i}. {result['title']}")
-        lines.append(f"   URL: {result['url']}")
-        if result['snippet']:
-            lines.append(f"   Summary: {result['snippet']}")
-        if result['date']:
-            lines.append(f"   Date: {result['date']}")
         lines.append("")  # Empty line between results
     # Add pagination hint
@@ -608,7 +681,7 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
         lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
     result = "\n".join(lines)
-    _log_call_end("Search_DuckDuckGo", f"page={page} results={len(results)} chars={len(result)}")
     return result
@@ -1138,19 +1211,26 @@ concise_interface = gr.Interface(
         gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
         gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
     ],
     outputs=gr.Textbox(label="Search Results", interactive=False),
     title="DuckDuckGo Search",
     description=(
-        "<div style=\"text-align:center\">Web search with readable output format, date detection, and pagination support. Supports advanced search operators.</div>"
     ),
     api_description=(
-        "Run a DuckDuckGo search and return numbered results with URLs, titles, summaries, and publication dates when detectable. "
         "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
         "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
         "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
-        "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination). "
-        "Returns formatted results with date metadata and pagination hints for accessing more results."
     ),
     flagging_mode="never",
     submit_btn="Search",

     return ""
+def _format_search_result(result: dict, search_type: str, index: int) -> list[str]:
+    """
+    Format a single search result based on the search type.
+    Returns a list of strings to be joined with newlines.
+    """
+    lines = []
+    if search_type == "text":
+        title = result.get("title", "").strip()
+        url = result.get("href", "").strip()
+        snippet = result.get("body", "").strip()
+        date = _extract_date_from_snippet(snippet)
+        lines.append(f"{index}. {title}")
+        lines.append(f"   URL: {url}")
+        if snippet:
+            lines.append(f"   Summary: {snippet}")
+        if date:
+            lines.append(f"   Date: {date}")
+    elif search_type == "news":
+        title = result.get("title", "").strip()
+        url = result.get("url", "").strip()
+        body = result.get("body", "").strip()
+        date = result.get("date", "").strip()
+        source = result.get("source", "").strip()
+        lines.append(f"{index}. {title}")
+        lines.append(f"   URL: {url}")
+        if source:
+            lines.append(f"   Source: {source}")
+        if date:
+            lines.append(f"   Date: {date}")
+        if body:
+            lines.append(f"   Summary: {body}")
+    elif search_type == "images":
+        title = result.get("title", "").strip()
+        image_url = result.get("image", "").strip()
+        source_url = result.get("url", "").strip()
+        source = result.get("source", "").strip()
+        width = result.get("width", "")
+        height = result.get("height", "")
+        lines.append(f"{index}. {title}")
+        lines.append(f"   Image: {image_url}")
+        lines.append(f"   Source: {source_url}")
+        if source:
+            lines.append(f"   Publisher: {source}")
+        if width and height:
+            lines.append(f"   Dimensions: {width}x{height}")
+    elif search_type == "videos":
+        title = result.get("title", "").strip()
+        description = result.get("description", "").strip()
+        duration = result.get("duration", "").strip()
+        published = result.get("published", "").strip()
+        uploader = result.get("uploader", "").strip()
+        embed_url = result.get("embed_url", "").strip()
+        lines.append(f"{index}. {title}")
+        if embed_url:
+            lines.append(f"   Video: {embed_url}")
+        if uploader:
+            lines.append(f"   Uploader: {uploader}")
+        if duration:
+            lines.append(f"   Duration: {duration}")
+        if published:
+            lines.append(f"   Published: {published}")
+        if description:
+            lines.append(f"   Description: {description}")
+    elif search_type == "books":
+        title = result.get("title", "").strip()
+        url = result.get("url", "").strip()
+        body = result.get("body", "").strip()
+        lines.append(f"{index}. {title}")
+        lines.append(f"   URL: {url}")
+        if body:
+            lines.append(f"   Description: {body}")
+    return lines
 def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
     query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
     max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
     page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
+    search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
 ) -> str:
     """
+    Run a DuckDuckGo search and return formatted results with support for multiple content types.
     Args:
         query (str): The search query string. Supports operators like site:, quotes for exact matching,
                - Exclude terms: "cats -dogs"
         max_results (int): Number of results to return per page (1–20). Default: 5.
         page (int): Page number for pagination (1-based). Default: 1.
+        search_type (str): Type of search to perform:
+               - "text": Web pages (default)
+               - "news": News articles with dates and sources
+               - "images": Image results with dimensions and sources
+               - "videos": Video results with duration and upload info
+               - "books": Book search results
     Returns:
+        str: Search results formatted appropriately for the search type, with pagination info.
     """
+    _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type)
     if not query or not query.strip():
         result = "No search query provided. Please enter a search term."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
     # Validate parameters
     max_results = max(1, min(20, max_results))
     page = max(1, page)
+    valid_types = ["text", "news", "images", "videos", "books"]
+    if search_type not in valid_types:
+        search_type = "text"
     # Calculate offset for pagination
     offset = (page - 1) * max_results
         # Apply rate limiting to avoid being blocked
         _search_rate_limiter.acquire()
+        # Perform search with timeout handling based on search type
         with DDGS() as ddgs:
+            if search_type == "text":
+                raw_gen = ddgs.text(query, max_results=total_needed + 10)
+            elif search_type == "news":
+                raw_gen = ddgs.news(query, max_results=total_needed + 10)
+            elif search_type == "images":
+                raw_gen = ddgs.images(query, max_results=total_needed + 10)
+            elif search_type == "videos":
+                raw_gen = ddgs.videos(query, max_results=total_needed + 10)
+            elif search_type == "books":
+                raw_gen = ddgs.books(query, max_results=total_needed + 10)
             raw = list(raw_gen)
     except Exception as e:
         return result
     if not raw:
+        result = f"No {search_type} results found for query: {query}"
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
     paginated_results = raw[offset:offset + max_results]
     if not paginated_results:
+        result = f"No {search_type} results found on page {page} for query: {query}. Try page 1 or reduce page number."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
+    # Format results based on search type
     total_available = len(raw)
     start_num = offset + 1
+    end_num = offset + len(paginated_results)
+    lines = [f"{search_type.title()} search results for: {query}"]
     lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
+    for i, result in enumerate(paginated_results, start_num):
+        result_lines = _format_search_result(result, search_type, i)
+        lines.extend(result_lines)
         lines.append("")  # Empty line between results
     # Add pagination hint
         lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
     result = "\n".join(lines)
+    _log_call_end("Search_DuckDuckGo", f"type={search_type} page={page} results={len(paginated_results)} chars={len(result)}")
     return result
         gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
         gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
+        gr.Radio(
+            label="Search Type",
+            choices=["text", "news", "images", "videos", "books"],
+            value="text",
+            info="Type of content to search for"
+        ),
     ],
     outputs=gr.Textbox(label="Search Results", interactive=False),
     title="DuckDuckGo Search",
     description=(
+        "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and pagination. Supports text, news, images, videos, and books.</div>"
     ),
     api_description=(
+        "Run a DuckDuckGo search with support for multiple content types and return formatted results. "
         "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
         "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
         "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
+        "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
+        "search_type (str: text/news/images/videos/books). "
+        "Returns appropriately formatted results with metadata and pagination hints for each content type."
     ),
     flagging_mode="never",
     submit_btn="Search",