shortening MCP tool descriptions to make context more concise
Browse files
app.py
CHANGED
|
@@ -206,17 +206,7 @@ def _extract_main_text(html: str) -> Tuple[str, BeautifulSoup]:
|
|
| 206 |
|
| 207 |
|
| 208 |
def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str) -> str:
|
| 209 |
-
|
| 210 |
-
Convert the page's main content (or body fallback) to Markdown, similar to
|
| 211 |
-
web-scraper's Content Scraper tool, but without any file download side-effects.
|
| 212 |
-
|
| 213 |
-
Steps:
|
| 214 |
-
- Remove noisy elements (script/style/nav/footer/header/aside)
|
| 215 |
-
- Prefer <main>, <article>, or common content containers; fallback to <body>
|
| 216 |
-
- Convert to Markdown with ATX headings
|
| 217 |
-
- Clean up excessive newlines, empty links, and whitespace
|
| 218 |
-
- Prepend a title header when available
|
| 219 |
-
"""
|
| 220 |
# Remove unwanted elements globally first
|
| 221 |
for element in full_soup.select("script, style, nav, footer, header, aside"):
|
| 222 |
element.decompose()
|
|
@@ -423,8 +413,7 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
|
| 423 |
max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
|
| 424 |
) -> str:
|
| 425 |
"""
|
| 426 |
-
Run a DuckDuckGo search
|
| 427 |
-
Always returns results in human-friendly format with snippets included.
|
| 428 |
|
| 429 |
Args:
|
| 430 |
query (str): The search query string. Supports operators like site:, quotes for exact matching,
|
|
@@ -670,41 +659,25 @@ def List_Kokoro_Voices() -> List[str]:
|
|
| 670 |
def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
| 671 |
text: Annotated[str, "The text to synthesize (English)."],
|
| 672 |
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.25,
|
| 673 |
-
voice: Annotated[str, "Voice identifier from 54 available options.
|
| 674 |
) -> Tuple[int, np.ndarray]:
|
| 675 |
"""
|
| 676 |
-
Synthesize speech from text using the Kokoro-82M model
|
| 677 |
|
| 678 |
This function returns raw audio suitable for a Gradio Audio component and is
|
| 679 |
also exposed as an MCP tool. It supports 54 different voices across multiple
|
| 680 |
languages and accents including American, British, European, Hindi, Italian,
|
| 681 |
Japanese, Portuguese, and Chinese speakers.
|
| 682 |
|
| 683 |
-
Enhanced for longer audio generation:
|
| 684 |
-
- Processes ALL text segments (not just the first one)
|
| 685 |
-
- Can generate audio of any length based on input text
|
| 686 |
-
- Concatenates multiple segments for seamless longer audio
|
| 687 |
-
|
| 688 |
-
Default behavior:
|
| 689 |
-
- Speed defaults to 1.25 (slightly brisk cadence) for clearer, snappier delivery.
|
| 690 |
-
- Voice defaults to "af_heart" (American Female, Heart voice)
|
| 691 |
-
|
| 692 |
Args:
|
| 693 |
text (str): The text to synthesize. Works best with English but supports multiple languages.
|
| 694 |
speed (float): Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
|
| 695 |
-
voice (str): Voice identifier from 54 available options.
|
| 696 |
|
| 697 |
Returns:
|
| 698 |
A tuple of (sample_rate_hz, audio_waveform) where:
|
| 699 |
- sample_rate_hz: int sample rate in Hz (24_000)
|
| 700 |
- audio_waveform: numpy.ndarray float32 mono waveform in range [-1, 1]
|
| 701 |
-
|
| 702 |
-
Notes:
|
| 703 |
-
- Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is raised.
|
| 704 |
-
- Runs on CUDA if available; otherwise CPU.
|
| 705 |
-
- Supports 54 voices across 9 language/accent categories.
|
| 706 |
-
- Can generate audio of any length - no 30 second limit!
|
| 707 |
-
- Use List_Kokoro_Voices() MCP tool to discover all available voice options.
|
| 708 |
"""
|
| 709 |
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
|
| 710 |
if not text or not text.strip():
|
|
@@ -793,11 +766,7 @@ fetch_interface = gr.Interface(
|
|
| 793 |
),
|
| 794 |
api_description=(
|
| 795 |
"Fetch a web page and return it converted to Markdown format with configurable length. "
|
| 796 |
-
"
|
| 797 |
-
"preserving headings, formatting, and structure while removing navigation, footers, scripts, "
|
| 798 |
-
"and other non-content elements. Parameters: url (str - absolute URL), verbosity (str - "
|
| 799 |
-
"Brief/Standard/Full controlling output length: Brief=1000 chars, Standard=3000 chars, Full=complete page). "
|
| 800 |
-
"Returns clean Markdown with page title as H1 header and preserved content hierarchy."
|
| 801 |
),
|
| 802 |
flagging_mode="never",
|
| 803 |
)
|
|
@@ -815,8 +784,7 @@ concise_interface = gr.Interface(
|
|
| 815 |
"<div style=\"text-align:center\">Enhanced web search with readable output format. Always includes snippets for better context and understanding.</div>"
|
| 816 |
),
|
| 817 |
api_description=(
|
| 818 |
-
"Run a DuckDuckGo search
|
| 819 |
-
"Always returns results in human-friendly format with snippets included for better context. "
|
| 820 |
"Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
|
| 821 |
"OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
|
| 822 |
"'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'."
|
|
@@ -902,11 +870,10 @@ kokoro_interface = gr.Interface(
|
|
| 902 |
"<div style=\"text-align:center\">Generate speech with Kokoro-82M using 54 different voices. Supports multiple languages and accents. Can generate audio of any length! Runs on CPU or CUDA if available.</div>"
|
| 903 |
),
|
| 904 |
api_description=(
|
| 905 |
-
"Synthesize speech from text using Kokoro-82M
|
| 906 |
-
"Supports unlimited text length by processing all segments. Voice examples: 'af_heart' (US female), '
|
| 907 |
-
"'
|
| 908 |
"Parameters: text (str), speed (float 0.5–2.0, default 1.25x), voice (str from 54 available options, default 'af_heart'). "
|
| 909 |
-
"Use List_Kokoro_Voices() to see all available voices. "
|
| 910 |
"Return the generated media to the user in this format ``"
|
| 911 |
),
|
| 912 |
flagging_mode="never",
|
|
@@ -935,14 +902,7 @@ def Generate_Image( # <-- MCP tool #5 (Generate Image)
|
|
| 935 |
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 936 |
) -> Image.Image:
|
| 937 |
"""
|
| 938 |
-
Generate a single image from a text prompt using a Hugging Face model via
|
| 939 |
-
serverless Inference. Returns a PIL image. By default, the model is
|
| 940 |
-
black-forest-labs/FLUX.1-Krea-dev.
|
| 941 |
-
|
| 942 |
-
Notes (MCP):
|
| 943 |
-
- Per the latest Gradio MCP docs, images returned from tools are handled by the server and
|
| 944 |
-
converted to file URLs automatically for MCP clients. Ensure type hints and this docstring
|
| 945 |
-
"Args:" block are present so the tool schema is accurate.
|
| 946 |
|
| 947 |
Args:
|
| 948 |
prompt (str): Text description of the image to generate.
|
|
@@ -1034,9 +994,9 @@ image_generation_interface = gr.Interface(
|
|
| 1034 |
"Default model is FLUX.1-Krea</div>"
|
| 1035 |
),
|
| 1036 |
api_description=(
|
| 1037 |
-
"Generate a single image from a text prompt using a Hugging Face model
|
| 1038 |
"Supports creative prompts like 'a serene mountain landscape at sunset', 'portrait of a wise owl', "
|
| 1039 |
-
"'futuristic city with flying cars'. Default model: FLUX.1-Krea-dev
|
| 1040 |
"Parameters: prompt (str), model_id (str, creator/model-name), negative_prompt (str), steps (int, 1–100), "
|
| 1041 |
"cfg_scale (float, 1–20), sampler (str), seed (int, -1=random), width/height (int, 64–1216). "
|
| 1042 |
"Returns a PIL.Image. Return the generated media to the user in this format ``"
|
|
@@ -1095,11 +1055,7 @@ def Generate_Video( # <-- MCP tool #6 (Generate Video)
|
|
| 1095 |
duration: Annotated[float, "Target duration in seconds (provider/model dependent, commonly 2–6s)."] = 4.0,
|
| 1096 |
) -> str:
|
| 1097 |
"""
|
| 1098 |
-
Generate a short video from a text prompt using Hugging Face
|
| 1099 |
-
|
| 1100 |
-
This tool follows the latest MCP guidance for Gradio-based MCP servers: clear type hints and
|
| 1101 |
-
docstrings define the tool schema automatically. The returned file path will be converted to a file URL
|
| 1102 |
-
for MCP clients.
|
| 1103 |
|
| 1104 |
Args:
|
| 1105 |
prompt (str): Text description of the video to generate.
|
|
@@ -1228,7 +1184,7 @@ video_generation_interface = gr.Interface(
|
|
| 1228 |
"Default model is Wan2.2-T2V-A14B.</div>"
|
| 1229 |
),
|
| 1230 |
api_description=(
|
| 1231 |
-
"Generate a short video from a text prompt using a Hugging Face model
|
| 1232 |
"Create dynamic scenes like 'a red fox running through a snowy forest at sunrise', 'waves crashing on a rocky shore', "
|
| 1233 |
"'time-lapse of clouds moving across a blue sky'. Default model: Wan2.2-T2V-A14B (2-6 second videos). "
|
| 1234 |
"Parameters: prompt (str), model_id (str), negative_prompt (str), steps (int), cfg_scale (float), seed (int), "
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str) -> str:
|
| 209 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
# Remove unwanted elements globally first
|
| 211 |
for element in full_soup.select("script, style, nav, footer, header, aside"):
|
| 212 |
element.decompose()
|
|
|
|
| 413 |
max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
|
| 414 |
) -> str:
|
| 415 |
"""
|
| 416 |
+
Run a DuckDuckGo search and return numbered results with URLs, titles, and summaries.
|
|
|
|
| 417 |
|
| 418 |
Args:
|
| 419 |
query (str): The search query string. Supports operators like site:, quotes for exact matching,
|
|
|
|
| 659 |
def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
| 660 |
text: Annotated[str, "The text to synthesize (English)."],
|
| 661 |
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.25,
|
| 662 |
+
voice: Annotated[str, "Voice identifier from 54 available options."] = "af_heart",
|
| 663 |
) -> Tuple[int, np.ndarray]:
|
| 664 |
"""
|
| 665 |
+
Synthesize speech from text using the Kokoro-82M TTS model.
|
| 666 |
|
| 667 |
This function returns raw audio suitable for a Gradio Audio component and is
|
| 668 |
also exposed as an MCP tool. It supports 54 different voices across multiple
|
| 669 |
languages and accents including American, British, European, Hindi, Italian,
|
| 670 |
Japanese, Portuguese, and Chinese speakers.
|
| 671 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
Args:
|
| 673 |
text (str): The text to synthesize. Works best with English but supports multiple languages.
|
| 674 |
speed (float): Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
|
| 675 |
+
voice (str): Voice identifier from 54 available options. Default: 'af_heart'.
|
| 676 |
|
| 677 |
Returns:
|
| 678 |
A tuple of (sample_rate_hz, audio_waveform) where:
|
| 679 |
- sample_rate_hz: int sample rate in Hz (24_000)
|
| 680 |
- audio_waveform: numpy.ndarray float32 mono waveform in range [-1, 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
"""
|
| 682 |
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
|
| 683 |
if not text or not text.strip():
|
|
|
|
| 766 |
),
|
| 767 |
api_description=(
|
| 768 |
"Fetch a web page and return it converted to Markdown format with configurable length. "
|
| 769 |
+
"Parameters: url (str - absolute URL), verbosity (str - Brief/Standard/Full controlling output length: Brief=1000 chars, Standard=3000 chars, Full=complete page)."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
),
|
| 771 |
flagging_mode="never",
|
| 772 |
)
|
|
|
|
| 784 |
"<div style=\"text-align:center\">Enhanced web search with readable output format. Always includes snippets for better context and understanding.</div>"
|
| 785 |
),
|
| 786 |
api_description=(
|
| 787 |
+
"Run a DuckDuckGo search and return numbered results with URLs, titles, and summaries. "
|
|
|
|
| 788 |
"Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
|
| 789 |
"OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
|
| 790 |
"'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'."
|
|
|
|
| 870 |
"<div style=\"text-align:center\">Generate speech with Kokoro-82M using 54 different voices. Supports multiple languages and accents. Can generate audio of any length! Runs on CPU or CUDA if available.</div>"
|
| 871 |
),
|
| 872 |
api_description=(
|
| 873 |
+
"Synthesize speech from text using Kokoro-82M TTS model. Returns (sample_rate, waveform) suitable for playback. "
|
| 874 |
+
"Supports unlimited text length by processing all segments. Voice examples: 'af_heart' (US female), 'am_onyx' (US male), "
|
| 875 |
+
"'bf_emma' (British female), 'af_sky' (US female), 'af_nicole' (US female), "
|
| 876 |
"Parameters: text (str), speed (float 0.5–2.0, default 1.25x), voice (str from 54 available options, default 'af_heart'). "
|
|
|
|
| 877 |
"Return the generated media to the user in this format ``"
|
| 878 |
),
|
| 879 |
flagging_mode="never",
|
|
|
|
| 902 |
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 903 |
) -> Image.Image:
|
| 904 |
"""
|
| 905 |
+
Generate a single image from a text prompt using a Hugging Face model via serverless inference.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
|
| 907 |
Args:
|
| 908 |
prompt (str): Text description of the image to generate.
|
|
|
|
| 994 |
"Default model is FLUX.1-Krea</div>"
|
| 995 |
),
|
| 996 |
api_description=(
|
| 997 |
+
"Generate a single image from a text prompt using a Hugging Face model via serverless inference. "
|
| 998 |
"Supports creative prompts like 'a serene mountain landscape at sunset', 'portrait of a wise owl', "
|
| 999 |
+
"'futuristic city with flying cars'. Default model: FLUX.1-Krea-dev. "
|
| 1000 |
"Parameters: prompt (str), model_id (str, creator/model-name), negative_prompt (str), steps (int, 1–100), "
|
| 1001 |
"cfg_scale (float, 1–20), sampler (str), seed (int, -1=random), width/height (int, 64–1216). "
|
| 1002 |
"Returns a PIL.Image. Return the generated media to the user in this format ``"
|
|
|
|
| 1055 |
duration: Annotated[float, "Target duration in seconds (provider/model dependent, commonly 2–6s)."] = 4.0,
|
| 1056 |
) -> str:
|
| 1057 |
"""
|
| 1058 |
+
Generate a short video from a text prompt using a Hugging Face model via serverless inference.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1059 |
|
| 1060 |
Args:
|
| 1061 |
prompt (str): Text description of the video to generate.
|
|
|
|
| 1184 |
"Default model is Wan2.2-T2V-A14B.</div>"
|
| 1185 |
),
|
| 1186 |
api_description=(
|
| 1187 |
+
"Generate a short video from a text prompt using a Hugging Face model via serverless inference. "
|
| 1188 |
"Create dynamic scenes like 'a red fox running through a snowy forest at sunrise', 'waves crashing on a rocky shore', "
|
| 1189 |
"'time-lapse of clouds moving across a blue sky'. Default model: Wan2.2-T2V-A14B (2-6 second videos). "
|
| 1190 |
"Parameters: prompt (str), model_id (str), negative_prompt (str), steps (int), cfg_scale (float), seed (int), "
|