Update app.py
Browse files
app.py
CHANGED
|
@@ -13,23 +13,19 @@ import json
|
|
| 13 |
import sys
|
| 14 |
import os
|
| 15 |
import random
|
| 16 |
-
import asyncio
|
| 17 |
-
import time
|
| 18 |
from io import StringIO
|
| 19 |
from typing import List, Dict, Tuple, Annotated
|
| 20 |
-
from datetime import datetime, timedelta
|
| 21 |
-
from dataclasses import dataclass
|
| 22 |
|
| 23 |
import gradio as gr
|
| 24 |
import requests
|
| 25 |
-
import httpx
|
| 26 |
from bs4 import BeautifulSoup
|
| 27 |
-
from markdownify import markdownify as md
|
| 28 |
-
from readability import Document
|
| 29 |
from urllib.parse import urljoin, urldefrag, urlparse
|
| 30 |
from ddgs import DDGS
|
| 31 |
from PIL import Image
|
| 32 |
from huggingface_hub import InferenceClient
|
|
|
|
| 33 |
|
| 34 |
# Optional imports for Kokoro TTS (loaded lazily)
|
| 35 |
import numpy as np
|
|
@@ -44,94 +40,6 @@ except Exception: # pragma: no cover - optional dependency
|
|
| 44 |
KPipeline = None # type: ignore
|
| 45 |
|
| 46 |
|
| 47 |
-
# ==============================
|
| 48 |
-
# Rate Limiting and HTTP Utils
|
| 49 |
-
# ==============================
|
| 50 |
-
|
| 51 |
-
@dataclass
|
| 52 |
-
class SearchResult:
|
| 53 |
-
title: str
|
| 54 |
-
link: str
|
| 55 |
-
snippet: str
|
| 56 |
-
position: int
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
class RateLimiter:
|
| 60 |
-
"""Rate limiter to prevent being blocked by services"""
|
| 61 |
-
def __init__(self, requests_per_minute: int = 30):
|
| 62 |
-
self.requests_per_minute = requests_per_minute
|
| 63 |
-
self.requests = []
|
| 64 |
-
|
| 65 |
-
async def acquire(self):
|
| 66 |
-
now = datetime.now()
|
| 67 |
-
# Remove requests older than 1 minute
|
| 68 |
-
self.requests = [
|
| 69 |
-
req for req in self.requests if now - req < timedelta(minutes=1)
|
| 70 |
-
]
|
| 71 |
-
|
| 72 |
-
if len(self.requests) >= self.requests_per_minute:
|
| 73 |
-
# Wait until we can make another request
|
| 74 |
-
wait_time = 60 - (now - self.requests[0]).total_seconds()
|
| 75 |
-
if wait_time > 0:
|
| 76 |
-
await asyncio.sleep(wait_time)
|
| 77 |
-
|
| 78 |
-
self.requests.append(now)
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
class ImprovedWebFetcher:
|
| 82 |
-
"""Improved web fetcher with rate limiting and async support"""
|
| 83 |
-
|
| 84 |
-
def __init__(self):
|
| 85 |
-
self.rate_limiter = RateLimiter(requests_per_minute=20)
|
| 86 |
-
self.headers = {
|
| 87 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
| 88 |
-
"Accept-Language": "en-US,en;q=0.9",
|
| 89 |
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 90 |
-
}
|
| 91 |
-
|
| 92 |
-
async def fetch_async(self, url: str) -> Tuple[str, str, str]:
|
| 93 |
-
"""
|
| 94 |
-
Fetch URL with rate limiting and proper error handling
|
| 95 |
-
Returns: (html_content, final_url, error_message)
|
| 96 |
-
"""
|
| 97 |
-
await self.rate_limiter.acquire()
|
| 98 |
-
|
| 99 |
-
try:
|
| 100 |
-
async with httpx.AsyncClient() as client:
|
| 101 |
-
response = await client.get(
|
| 102 |
-
url,
|
| 103 |
-
headers=self.headers,
|
| 104 |
-
follow_redirects=True,
|
| 105 |
-
timeout=30.0,
|
| 106 |
-
)
|
| 107 |
-
response.raise_for_status()
|
| 108 |
-
return response.text, str(response.url), ""
|
| 109 |
-
|
| 110 |
-
except httpx.TimeoutException:
|
| 111 |
-
return "", "", f"Request timed out for URL: {url}"
|
| 112 |
-
except httpx.HTTPError as e:
|
| 113 |
-
return "", "", f"HTTP error occurred: {str(e)}"
|
| 114 |
-
except Exception as e:
|
| 115 |
-
return "", "", f"Unexpected error: {str(e)}"
|
| 116 |
-
|
| 117 |
-
# Global instances
|
| 118 |
-
_web_fetcher = ImprovedWebFetcher()
|
| 119 |
-
_search_rate_limiter = RateLimiter(requests_per_minute=30)
|
| 120 |
-
|
| 121 |
-
# Simple sync rate limiting for backwards compatibility
|
| 122 |
-
_last_request_time = 0
|
| 123 |
-
_min_request_interval = 2 # seconds between requests
|
| 124 |
-
|
| 125 |
-
def _apply_rate_limit():
|
| 126 |
-
"""Simple synchronous rate limiting"""
|
| 127 |
-
global _last_request_time
|
| 128 |
-
current_time = time.time()
|
| 129 |
-
elapsed = current_time - _last_request_time
|
| 130 |
-
if elapsed < _min_request_interval:
|
| 131 |
-
time.sleep(_min_request_interval - elapsed)
|
| 132 |
-
_last_request_time = time.time()
|
| 133 |
-
|
| 134 |
-
|
| 135 |
# ==============================
|
| 136 |
# Fetch: HTTP + extraction utils
|
| 137 |
# ==============================
|
|
@@ -139,18 +47,14 @@ def _apply_rate_limit():
|
|
| 139 |
def _http_get(url: str) -> requests.Response:
|
| 140 |
"""
|
| 141 |
Download the page politely with a short timeout and realistic headers.
|
| 142 |
-
Enhanced with better error handling, headers from ddg-search patterns, and rate limiting.
|
| 143 |
(Layman's terms: grab the web page like a normal browser would, but quickly.)
|
| 144 |
"""
|
| 145 |
-
# Apply rate limiting to avoid being blocked
|
| 146 |
-
_apply_rate_limit()
|
| 147 |
-
|
| 148 |
headers = {
|
| 149 |
-
"User-Agent": "Mozilla/5.0 (
|
| 150 |
"Accept-Language": "en-US,en;q=0.9",
|
| 151 |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 152 |
}
|
| 153 |
-
return requests.get(url, headers=headers, timeout=
|
| 154 |
|
| 155 |
|
| 156 |
def _normalize_whitespace(text: str) -> str:
|
|
@@ -256,8 +160,8 @@ def _extract_main_text(html: str) -> Tuple[str, BeautifulSoup]:
|
|
| 256 |
# Parse simplified HTML
|
| 257 |
s = BeautifulSoup(readable_html, "lxml")
|
| 258 |
|
| 259 |
-
# Remove noisy tags
|
| 260 |
-
for sel in ["script", "style", "noscript", "iframe", "svg"
|
| 261 |
for tag in s.select(sel):
|
| 262 |
tag.decompose()
|
| 263 |
|
|
@@ -453,30 +357,17 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
|
| 453 |
try:
|
| 454 |
resp = _http_get(url)
|
| 455 |
resp.raise_for_status()
|
| 456 |
-
except requests.exceptions.Timeout:
|
| 457 |
-
return f"Error: Request timed out while fetching {url}. Please try again or check if the website is accessible."
|
| 458 |
-
except requests.exceptions.ConnectionError:
|
| 459 |
-
return f"Error: Could not connect to {url}. Please check the URL and your internet connection."
|
| 460 |
-
except requests.exceptions.HTTPError as e:
|
| 461 |
-
return f"Error: HTTP {e.response.status_code} - {e.response.reason} when accessing {url}"
|
| 462 |
except requests.exceptions.RequestException as e:
|
| 463 |
-
return f"
|
| 464 |
-
except Exception as e:
|
| 465 |
-
return f"Error: An unexpected error occurred while fetching the webpage ({str(e)})"
|
| 466 |
|
| 467 |
final_url = str(resp.url)
|
| 468 |
ctype = resp.headers.get("Content-Type", "")
|
| 469 |
if "html" not in ctype.lower():
|
| 470 |
-
return f"Unsupported content type for extraction: {ctype or 'unknown'}
|
| 471 |
|
| 472 |
-
# Decode to text
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
try:
|
| 476 |
-
html = resp.text
|
| 477 |
-
except UnicodeDecodeError:
|
| 478 |
-
# Fallback encoding handling
|
| 479 |
-
html = resp.content.decode('utf-8', errors='replace')
|
| 480 |
|
| 481 |
# Full-page soup for metadata (and potential Markdown conversion)
|
| 482 |
full_soup = BeautifulSoup(html, "lxml")
|
|
@@ -527,11 +418,10 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 527 |
max_snippet_chars: Annotated[int, "Character cap applied to each snippet when included."] = 80,
|
| 528 |
dedupe_domains: Annotated[bool, "If true, only the first result from each domain is kept."] = True,
|
| 529 |
title_chars: Annotated[int, "Character cap applied to titles."] = 80,
|
| 530 |
-
output_format: Annotated[str, "Output format: 'jsonl' for compact JSON or 'readable' for LLM-friendly text."] = "jsonl",
|
| 531 |
) -> str:
|
| 532 |
"""
|
| 533 |
-
Run a DuckDuckGo search and return
|
| 534 |
-
|
| 535 |
|
| 536 |
Args:
|
| 537 |
query: The search query (supports operators like site:, quotes, OR).
|
|
@@ -540,36 +430,24 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 540 |
max_snippet_chars: Character cap applied to each snippet when included.
|
| 541 |
dedupe_domains: If true, only the first result from each domain is kept.
|
| 542 |
title_chars: Character cap applied to titles.
|
| 543 |
-
output_format: Output format: 'jsonl' for compact JSON or 'readable' for LLM-friendly text.
|
| 544 |
|
| 545 |
Returns:
|
| 546 |
-
str:
|
| 547 |
-
|
| 548 |
"""
|
| 549 |
if not query or not query.strip():
|
| 550 |
-
return "
|
| 551 |
-
|
| 552 |
-
# Apply rate limiting to avoid being blocked by DuckDuckGo
|
| 553 |
-
_apply_rate_limit()
|
| 554 |
|
| 555 |
try:
|
| 556 |
with DDGS() as ddgs:
|
| 557 |
raw = ddgs.text(query, max_results=max_results)
|
| 558 |
except Exception as e:
|
| 559 |
-
|
| 560 |
-
if output_format == "readable":
|
| 561 |
-
return f"Error: {error_msg}. This could be due to DuckDuckGo's bot detection or network issues. Please try rephrasing your search or try again in a few minutes."
|
| 562 |
-
return json.dumps({"error": error_msg}, ensure_ascii=False, separators=(",", ":"))
|
| 563 |
-
|
| 564 |
-
if not raw:
|
| 565 |
-
if output_format == "readable":
|
| 566 |
-
return f"No results found for query: {query}. This could be due to DuckDuckGo's bot detection or the query returned no matches. Please try rephrasing your search or try again in a few minutes."
|
| 567 |
-
return ""
|
| 568 |
|
| 569 |
seen_domains = set()
|
| 570 |
-
|
| 571 |
|
| 572 |
-
for
|
| 573 |
title = _shorten((r.get("title") or "").strip(), title_chars)
|
| 574 |
url = (r.get("href") or r.get("link") or "").strip()
|
| 575 |
body = (r.get("body") or r.get("snippet") or "").strip()
|
|
@@ -583,35 +461,16 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 583 |
continue
|
| 584 |
seen_domains.add(dom)
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
obj["s"] = _shorten(body, max_snippet_chars)
|
| 597 |
-
results.append(json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
|
| 598 |
-
|
| 599 |
-
if output_format == "readable":
|
| 600 |
-
if not results:
|
| 601 |
-
return f"No results found for query: {query}"
|
| 602 |
-
|
| 603 |
-
output = [f"Found {len(results)} search results for: {query}\n"]
|
| 604 |
-
for result in results:
|
| 605 |
-
output.append(f"{result['position']}. {result['title']}")
|
| 606 |
-
output.append(f" URL: {result['url']}")
|
| 607 |
-
if result['snippet']:
|
| 608 |
-
output.append(f" Summary: {result['snippet']}")
|
| 609 |
-
output.append("") # Empty line between results
|
| 610 |
-
|
| 611 |
-
return "\n".join(output).rstrip()
|
| 612 |
-
else:
|
| 613 |
-
# Return JSONL format (original behavior)
|
| 614 |
-
return "\n".join(results)
|
| 615 |
|
| 616 |
|
| 617 |
# ======================================
|
|
@@ -890,7 +749,7 @@ fetch_interface = gr.Interface(
|
|
| 890 |
"'Full-page Markdown (Content Scraper mode)' option to return the page "
|
| 891 |
"converted to Markdown."
|
| 892 |
),
|
| 893 |
-
|
| 894 |
)
|
| 895 |
|
| 896 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
|
@@ -903,19 +762,18 @@ concise_interface = gr.Interface(
|
|
| 903 |
gr.Slider(minimum=20, maximum=200, value=80, step=5, label="Max snippet chars"),
|
| 904 |
gr.Checkbox(value=True, label="Dedupe by domain"),
|
| 905 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
| 906 |
-
gr.Dropdown(label="Output Format", choices=["jsonl", "readable"], value="jsonl", info="JSONL for compact output, Readable for LLM-friendly format"),
|
| 907 |
],
|
| 908 |
-
outputs=gr.Textbox(label="
|
| 909 |
title="DuckDuckGo Search",
|
| 910 |
description=(
|
| 911 |
-
"<div style=\"text-align:center\">
|
| 912 |
),
|
| 913 |
api_description=(
|
| 914 |
-
"Run a DuckDuckGo search
|
| 915 |
-
"
|
| 916 |
-
"
|
| 917 |
),
|
| 918 |
-
|
| 919 |
submit_btn="Search",
|
| 920 |
)
|
| 921 |
|
|
@@ -937,7 +795,7 @@ code_interface = gr.Interface(
|
|
| 937 |
"Returns:\n"
|
| 938 |
"- string: Combined stdout produced by the code, or the exception text if execution failed."
|
| 939 |
),
|
| 940 |
-
|
| 941 |
)
|
| 942 |
|
| 943 |
CSS_STYLES = """
|
|
@@ -1002,7 +860,7 @@ kokoro_interface = gr.Interface(
|
|
| 1002 |
"Can generate audio of unlimited length by processing all text segments. "
|
| 1003 |
"Return the generated media to the user in this format ``"
|
| 1004 |
),
|
| 1005 |
-
|
| 1006 |
)
|
| 1007 |
|
| 1008 |
# ==========================
|
|
@@ -1128,7 +986,7 @@ image_generation_interface = gr.Interface(
|
|
| 1128 |
"sampler (str, label only), seed (int, -1=random), width/height (int, 64–1216). Returns a PIL.Image. "
|
| 1129 |
"Return the generated media to the user in this format ``"
|
| 1130 |
),
|
| 1131 |
-
|
| 1132 |
)
|
| 1133 |
|
| 1134 |
# ==========================
|
|
@@ -1305,7 +1163,7 @@ video_generation_interface = gr.Interface(
|
|
| 1305 |
"Parameters: prompt (str), model_id (str), negative_prompt (str), steps (int), cfg_scale (float), seed (int), "
|
| 1306 |
"width/height (int), fps (int), duration (float). Return the generated media to the user in this format ``"
|
| 1307 |
),
|
| 1308 |
-
|
| 1309 |
)
|
| 1310 |
|
| 1311 |
# Build tabbed app; disable Image/Video tools if no HF token is present
|
|
|
|
| 13 |
import sys
|
| 14 |
import os
|
| 15 |
import random
|
|
|
|
|
|
|
| 16 |
from io import StringIO
|
| 17 |
from typing import List, Dict, Tuple, Annotated
|
|
|
|
|
|
|
| 18 |
|
| 19 |
import gradio as gr
|
| 20 |
import requests
|
|
|
|
| 21 |
from bs4 import BeautifulSoup
|
| 22 |
+
from markdownify import markdownify as md
|
| 23 |
+
from readability import Document
|
| 24 |
from urllib.parse import urljoin, urldefrag, urlparse
|
| 25 |
from ddgs import DDGS
|
| 26 |
from PIL import Image
|
| 27 |
from huggingface_hub import InferenceClient
|
| 28 |
+
import time
|
| 29 |
|
| 30 |
# Optional imports for Kokoro TTS (loaded lazily)
|
| 31 |
import numpy as np
|
|
|
|
| 40 |
KPipeline = None # type: ignore
|
| 41 |
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# ==============================
|
| 44 |
# Fetch: HTTP + extraction utils
|
| 45 |
# ==============================
|
|
|
|
| 47 |
def _http_get(url: str) -> requests.Response:
|
| 48 |
"""
|
| 49 |
Download the page politely with a short timeout and realistic headers.
|
|
|
|
| 50 |
(Layman's terms: grab the web page like a normal browser would, but quickly.)
|
| 51 |
"""
|
|
|
|
|
|
|
|
|
|
| 52 |
headers = {
|
| 53 |
+
"User-Agent": "Mozilla/5.0 (compatible; WebMCP/1.0; +https://example.com)",
|
| 54 |
"Accept-Language": "en-US,en;q=0.9",
|
| 55 |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 56 |
}
|
| 57 |
+
return requests.get(url, headers=headers, timeout=15)
|
| 58 |
|
| 59 |
|
| 60 |
def _normalize_whitespace(text: str) -> str:
|
|
|
|
| 160 |
# Parse simplified HTML
|
| 161 |
s = BeautifulSoup(readable_html, "lxml")
|
| 162 |
|
| 163 |
+
# Remove noisy tags
|
| 164 |
+
for sel in ["script", "style", "noscript", "iframe", "svg"]:
|
| 165 |
for tag in s.select(sel):
|
| 166 |
tag.decompose()
|
| 167 |
|
|
|
|
| 357 |
try:
|
| 358 |
resp = _http_get(url)
|
| 359 |
resp.raise_for_status()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
except requests.exceptions.RequestException as e:
|
| 361 |
+
return f"An error occurred: {e}"
|
|
|
|
|
|
|
| 362 |
|
| 363 |
final_url = str(resp.url)
|
| 364 |
ctype = resp.headers.get("Content-Type", "")
|
| 365 |
if "html" not in ctype.lower():
|
| 366 |
+
return f"Unsupported content type for extraction: {ctype or 'unknown'}"
|
| 367 |
|
| 368 |
+
# Decode to text
|
| 369 |
+
resp.encoding = resp.encoding or resp.apparent_encoding
|
| 370 |
+
html = resp.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
# Full-page soup for metadata (and potential Markdown conversion)
|
| 373 |
full_soup = BeautifulSoup(html, "lxml")
|
|
|
|
| 418 |
max_snippet_chars: Annotated[int, "Character cap applied to each snippet when included."] = 80,
|
| 419 |
dedupe_domains: Annotated[bool, "If true, only the first result from each domain is kept."] = True,
|
| 420 |
title_chars: Annotated[int, "Character cap applied to titles."] = 80,
|
|
|
|
| 421 |
) -> str:
|
| 422 |
"""
|
| 423 |
+
Run a DuckDuckGo search and return ultra-compact JSONL with short keys to
|
| 424 |
+
minimize tokens.
|
| 425 |
|
| 426 |
Args:
|
| 427 |
query: The search query (supports operators like site:, quotes, OR).
|
|
|
|
| 430 |
max_snippet_chars: Character cap applied to each snippet when included.
|
| 431 |
dedupe_domains: If true, only the first result from each domain is kept.
|
| 432 |
title_chars: Character cap applied to titles.
|
|
|
|
| 433 |
|
| 434 |
Returns:
|
| 435 |
+
str: Newline-delimited JSON (JSONL). Each line has:
|
| 436 |
+
{"t": "title", "u": "url"[, "s": "snippet"]}
|
| 437 |
"""
|
| 438 |
if not query or not query.strip():
|
| 439 |
+
return ""
|
|
|
|
|
|
|
|
|
|
| 440 |
|
| 441 |
try:
|
| 442 |
with DDGS() as ddgs:
|
| 443 |
raw = ddgs.text(query, max_results=max_results)
|
| 444 |
except Exception as e:
|
| 445 |
+
return json.dumps({"error": str(e)[:120]}, ensure_ascii=False, separators=(",", ":"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
|
| 447 |
seen_domains = set()
|
| 448 |
+
lines: List[str] = []
|
| 449 |
|
| 450 |
+
for r in raw or []:
|
| 451 |
title = _shorten((r.get("title") or "").strip(), title_chars)
|
| 452 |
url = (r.get("href") or r.get("link") or "").strip()
|
| 453 |
body = (r.get("body") or r.get("snippet") or "").strip()
|
|
|
|
| 461 |
continue
|
| 462 |
seen_domains.add(dom)
|
| 463 |
|
| 464 |
+
obj = {"t": title or _domain_of(url), "u": url}
|
| 465 |
+
|
| 466 |
+
if include_snippets and body:
|
| 467 |
+
obj["s"] = _shorten(body, max_snippet_chars)
|
| 468 |
+
|
| 469 |
+
# Emit most compact JSON possible (no spaces)
|
| 470 |
+
lines.append(json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
|
| 471 |
+
|
| 472 |
+
# Join as JSONL (each result on its own line)
|
| 473 |
+
return "\n".join(lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
|
| 475 |
|
| 476 |
# ======================================
|
|
|
|
| 749 |
"'Full-page Markdown (Content Scraper mode)' option to return the page "
|
| 750 |
"converted to Markdown."
|
| 751 |
),
|
| 752 |
+
allow_flagging="never",
|
| 753 |
)
|
| 754 |
|
| 755 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
|
|
|
| 762 |
gr.Slider(minimum=20, maximum=200, value=80, step=5, label="Max snippet chars"),
|
| 763 |
gr.Checkbox(value=True, label="Dedupe by domain"),
|
| 764 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
|
|
|
| 765 |
],
|
| 766 |
+
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
| 767 |
title="DuckDuckGo Search",
|
| 768 |
description=(
|
| 769 |
+
"<div style=\"text-align:center\">Very concise web search to avoid unnecessary context. Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.</div>"
|
| 770 |
),
|
| 771 |
api_description=(
|
| 772 |
+
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
| 773 |
+
"t=title, u=url, optional s=snippet. Options control result count, "
|
| 774 |
+
"snippet inclusion and length, domain deduping, and title length."
|
| 775 |
),
|
| 776 |
+
allow_flagging="never",
|
| 777 |
submit_btn="Search",
|
| 778 |
)
|
| 779 |
|
|
|
|
| 795 |
"Returns:\n"
|
| 796 |
"- string: Combined stdout produced by the code, or the exception text if execution failed."
|
| 797 |
),
|
| 798 |
+
allow_flagging="never",
|
| 799 |
)
|
| 800 |
|
| 801 |
CSS_STYLES = """
|
|
|
|
| 860 |
"Can generate audio of unlimited length by processing all text segments. "
|
| 861 |
"Return the generated media to the user in this format ``"
|
| 862 |
),
|
| 863 |
+
allow_flagging="never",
|
| 864 |
)
|
| 865 |
|
| 866 |
# ==========================
|
|
|
|
| 986 |
"sampler (str, label only), seed (int, -1=random), width/height (int, 64–1216). Returns a PIL.Image. "
|
| 987 |
"Return the generated media to the user in this format ``"
|
| 988 |
),
|
| 989 |
+
allow_flagging="never",
|
| 990 |
)
|
| 991 |
|
| 992 |
# ==========================
|
|
|
|
| 1163 |
"Parameters: prompt (str), model_id (str), negative_prompt (str), steps (int), cfg_scale (float), seed (int), "
|
| 1164 |
"width/height (int), fps (int), duration (float). Return the generated media to the user in this format ``"
|
| 1165 |
),
|
| 1166 |
+
allow_flagging="never",
|
| 1167 |
)
|
| 1168 |
|
| 1169 |
# Build tabbed app; disable Image/Video tools if no HF token is present
|