Spaces:

Nymbo
/

Tools

Running

Nymbo commited on Aug 25

Commit

a455050

verified ·

1 Parent(s): 86c62a8

default speed for Kokoro is 1.25x

Files changed (1) hide show

app.py CHANGED Viewed

@@ -499,7 +499,7 @@ def _init_kokoro() -> None:
 def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
     text: Annotated[str, "The text to synthesize (English)."],
-    speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
     voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
 ) -> Tuple[int, np.ndarray]:
     """
@@ -510,9 +510,12 @@ def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
     tool is created for each function wired into your app; docstrings and type
     hints are used to describe the tool).
     Args:
         text: The text to synthesize (English).
-        speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed.
         voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
     Returns:
@@ -668,7 +671,7 @@ kokoro_interface = gr.Interface(
     fn=Generate_Speech,
     inputs=[
         gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
-        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
         gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
     ],
     outputs=gr.Audio(label="Audio", type="numpy"),

 def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
     text: Annotated[str, "The text to synthesize (English)."],
+    speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.25,
     voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
 ) -> Tuple[int, np.ndarray]:
     """
     tool is created for each function wired into your app; docstrings and type
     hints are used to describe the tool).
+    Default behavior:
+        - Speed defaults to 1.25 (slightly brisk cadence) for clearer, snappier delivery.
     Args:
         text: The text to synthesize (English).
+        speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
         voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
     Returns:
     fn=Generate_Speech,
     inputs=[
         gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
+    gr.Slider(minimum=0.5, maximum=2.0, value=1.25, step=0.1, label="Speed"),
         gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
     ],
     outputs=gr.Audio(label="Audio", type="numpy"),