Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
# File: main/app.py
|
| 2 |
-
# Purpose: One Space that offers
|
| 3 |
# 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
|
| 4 |
-
# 2)
|
| 5 |
-
# 3)
|
| 6 |
-
# 4)
|
| 7 |
-
# 5) Generate Sitemap — LIMITED: grouped internal/external links with an optional per-domain cap (and a .md download)
|
| 8 |
|
| 9 |
from __future__ import annotations
|
| 10 |
|
|
@@ -12,14 +11,13 @@ import re
|
|
| 12 |
import json
|
| 13 |
import sys
|
| 14 |
from io import StringIO
|
| 15 |
-
from typing import List, Dict,
|
| 16 |
|
| 17 |
import gradio as gr
|
| 18 |
import requests
|
| 19 |
from bs4 import BeautifulSoup
|
| 20 |
from readability import Document
|
| 21 |
from urllib.parse import urljoin, urldefrag, urlparse
|
| 22 |
-
from langchain_community.tools import DuckDuckGoSearchResults
|
| 23 |
from duckduckgo_search import DDGS
|
| 24 |
|
| 25 |
|
|
@@ -320,53 +318,16 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
|
| 320 |
return md or "No content could be extracted."
|
| 321 |
|
| 322 |
|
| 323 |
-
#
|
| 324 |
-
#
|
| 325 |
-
#
|
| 326 |
-
|
| 327 |
-
def Search_Structured( # <-- MCP tool #3 (Structured DDG)
|
| 328 |
-
input_query: str,
|
| 329 |
-
max_results: int = 5,
|
| 330 |
-
) -> List[Dict[Literal["snippet", "title", "link"], str]]:
|
| 331 |
-
"""
|
| 332 |
-
Run a DuckDuckGo search and return structured results as a list of dictionaries.
|
| 333 |
-
(Layman's terms: search DDG and get clean JSON objects.)
|
| 334 |
-
"""
|
| 335 |
-
if not input_query or not input_query.strip():
|
| 336 |
-
return []
|
| 337 |
-
|
| 338 |
-
# Create the search tool (LangChain community wrapper)
|
| 339 |
-
search = DuckDuckGoSearchResults(output_format="list", num_results=max_results)
|
| 340 |
-
|
| 341 |
-
# Run the search and return results as a list of dicts
|
| 342 |
-
results = search.invoke(input_query)
|
| 343 |
-
return results
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
# ========================================
|
| 347 |
-
# Unstructured DDG: raw list into Textbox
|
| 348 |
-
# ========================================
|
| 349 |
-
|
| 350 |
-
def Search_Raw( # <-- MCP tool #4 (Unstructured DDG)
|
| 351 |
-
query: str,
|
| 352 |
-
) -> list[dict]:
|
| 353 |
-
"""
|
| 354 |
-
Run a DuckDuckGo search using the native `duckduckgo_search` client and return the
|
| 355 |
-
raw Python list of dictionaries from the library.
|
| 356 |
-
(Layman's terms: search DDG and show exactly what the library returns.)
|
| 357 |
-
"""
|
| 358 |
-
if not query or not query.strip():
|
| 359 |
-
return []
|
| 360 |
-
with DDGS() as ddgs:
|
| 361 |
-
results = ddgs.text(query, max_results=5)
|
| 362 |
-
return results
|
| 363 |
|
| 364 |
|
| 365 |
# ============================================
|
| 366 |
# Concise DDG: ultra-succinct JSONL for tokens
|
| 367 |
# ============================================
|
| 368 |
|
| 369 |
-
def
|
| 370 |
query: str,
|
| 371 |
max_results: int = 5,
|
| 372 |
include_snippets: bool = False,
|
|
@@ -595,7 +556,7 @@ fetch_interface = gr.Interface(
|
|
| 595 |
|
| 596 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
| 597 |
concise_interface = gr.Interface(
|
| 598 |
-
fn=
|
| 599 |
inputs=[
|
| 600 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
| 601 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
|
@@ -605,7 +566,7 @@ concise_interface = gr.Interface(
|
|
| 605 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
| 606 |
],
|
| 607 |
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
| 608 |
-
title="DuckDuckGo Search
|
| 609 |
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
| 610 |
api_description=(
|
| 611 |
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
|
@@ -617,39 +578,7 @@ concise_interface = gr.Interface(
|
|
| 617 |
submit_btn="Search",
|
| 618 |
)
|
| 619 |
|
| 620 |
-
|
| 621 |
-
websearch_interface = gr.Interface(
|
| 622 |
-
fn=Search_Structured, # connect the function to the UI
|
| 623 |
-
inputs=[
|
| 624 |
-
gr.Textbox(value="", label="Search query", placeholder="site:example.com interesting topic"),
|
| 625 |
-
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
| 626 |
-
],
|
| 627 |
-
outputs=gr.JSON(label="Search results"),
|
| 628 |
-
title="DuckDuckGo Search (Structured)",
|
| 629 |
-
description="Search the web using DuckDuckGo; returns snippet, title, and link.",
|
| 630 |
-
api_description=(
|
| 631 |
-
"Run a DuckDuckGo web search and return a list of objects with keys: "
|
| 632 |
-
"snippet, title, and link. Configure the number of results."
|
| 633 |
-
),
|
| 634 |
-
allow_flagging="never",
|
| 635 |
-
theme="Nymbo/Nymbo_Theme",
|
| 636 |
-
)
|
| 637 |
-
|
| 638 |
-
# --- Unstructured DDG tab (matches your separate app���s output) ---
|
| 639 |
-
unstructured_interface = gr.Interface(
|
| 640 |
-
fn=Search_Raw,
|
| 641 |
-
inputs=gr.Textbox(label="Enter Search Query"),
|
| 642 |
-
outputs=gr.Textbox(label="Results", interactive=False),
|
| 643 |
-
title="DuckDuckGo Search (Raw)",
|
| 644 |
-
description="Returns the raw list of results (list[dict]) shown as text.",
|
| 645 |
-
api_description=(
|
| 646 |
-
"Run DuckDuckGo via the native client and return the raw list[dict] as "
|
| 647 |
-
"provided by duckduckgo_search (fields like title, href/link, body/snippet)."
|
| 648 |
-
),
|
| 649 |
-
allow_flagging="never",
|
| 650 |
-
theme="Nymbo/Nymbo_Theme",
|
| 651 |
-
submit_btn="Search",
|
| 652 |
-
)
|
| 653 |
|
| 654 |
# --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
|
| 655 |
sitemap_interface = gr.Interface(
|
|
@@ -692,12 +621,10 @@ code_interface = gr.Interface(
|
|
| 692 |
|
| 693 |
# --- Combine all into a single app with tabs ---
|
| 694 |
demo = gr.TabbedInterface(
|
| 695 |
-
interface_list=[fetch_interface, concise_interface,
|
| 696 |
tab_names=[
|
| 697 |
"Fetch Webpage",
|
| 698 |
-
"DuckDuckGo Search
|
| 699 |
-
"DuckDuckGo Search (Structured)",
|
| 700 |
-
"DuckDuckGo Search (Raw)",
|
| 701 |
"Generate Sitemap",
|
| 702 |
"Python Code Executor",
|
| 703 |
],
|
|
|
|
| 1 |
# File: main/app.py
|
| 2 |
+
# Purpose: One Space that offers four tools/tabs:
|
| 3 |
# 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
|
| 4 |
+
# 2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
|
| 5 |
+
# 3) Generate Sitemap — grouped internal/external links with an optional per-domain cap
|
| 6 |
+
# 4) Python Code Executor — run Python code and capture stdout/errors
|
|
|
|
| 7 |
|
| 8 |
from __future__ import annotations
|
| 9 |
|
|
|
|
| 11 |
import json
|
| 12 |
import sys
|
| 13 |
from io import StringIO
|
| 14 |
+
from typing import List, Dict, Tuple
|
| 15 |
|
| 16 |
import gradio as gr
|
| 17 |
import requests
|
| 18 |
from bs4 import BeautifulSoup
|
| 19 |
from readability import Document
|
| 20 |
from urllib.parse import urljoin, urldefrag, urlparse
|
|
|
|
| 21 |
from duckduckgo_search import DDGS
|
| 22 |
|
| 23 |
|
|
|
|
| 318 |
return md or "No content could be extracted."
|
| 319 |
|
| 320 |
|
| 321 |
+
# ===============================
|
| 322 |
+
# DuckDuckGo Search (JSONL lines)
|
| 323 |
+
# ===============================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
|
| 326 |
# ============================================
|
| 327 |
# Concise DDG: ultra-succinct JSONL for tokens
|
| 328 |
# ============================================
|
| 329 |
|
| 330 |
+
def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
| 331 |
query: str,
|
| 332 |
max_results: int = 5,
|
| 333 |
include_snippets: bool = False,
|
|
|
|
| 556 |
|
| 557 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
| 558 |
concise_interface = gr.Interface(
|
| 559 |
+
fn=Search_DuckDuckGo,
|
| 560 |
inputs=[
|
| 561 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
| 562 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
|
|
|
| 566 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
| 567 |
],
|
| 568 |
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
| 569 |
+
title="DuckDuckGo Search",
|
| 570 |
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
| 571 |
api_description=(
|
| 572 |
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
|
|
|
| 578 |
submit_btn="Search",
|
| 579 |
)
|
| 580 |
|
| 581 |
+
## Removed Structured and Raw tabs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
|
| 583 |
# --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
|
| 584 |
sitemap_interface = gr.Interface(
|
|
|
|
| 621 |
|
| 622 |
# --- Combine all into a single app with tabs ---
|
| 623 |
demo = gr.TabbedInterface(
|
| 624 |
+
interface_list=[fetch_interface, concise_interface, sitemap_interface, code_interface],
|
| 625 |
tab_names=[
|
| 626 |
"Fetch Webpage",
|
| 627 |
+
"DuckDuckGo Search",
|
|
|
|
|
|
|
| 628 |
"Generate Sitemap",
|
| 629 |
"Python Code Executor",
|
| 630 |
],
|