default speed for Kokoro is 1.25x
Browse files
app.py
CHANGED
|
@@ -499,7 +499,7 @@ def _init_kokoro() -> None:
|
|
| 499 |
|
| 500 |
def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
| 501 |
text: Annotated[str, "The text to synthesize (English)."],
|
| 502 |
-
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.
|
| 503 |
voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
|
| 504 |
) -> Tuple[int, np.ndarray]:
|
| 505 |
"""
|
|
@@ -510,9 +510,12 @@ def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
|
| 510 |
tool is created for each function wired into your app; docstrings and type
|
| 511 |
hints are used to describe the tool).
|
| 512 |
|
|
|
|
|
|
|
|
|
|
| 513 |
Args:
|
| 514 |
text: The text to synthesize (English).
|
| 515 |
-
speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed.
|
| 516 |
voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
|
| 517 |
|
| 518 |
Returns:
|
|
@@ -668,7 +671,7 @@ kokoro_interface = gr.Interface(
|
|
| 668 |
fn=Generate_Speech,
|
| 669 |
inputs=[
|
| 670 |
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
| 671 |
-
|
| 672 |
gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
|
| 673 |
],
|
| 674 |
outputs=gr.Audio(label="Audio", type="numpy"),
|
|
|
|
| 499 |
|
| 500 |
def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
| 501 |
text: Annotated[str, "The text to synthesize (English)."],
|
| 502 |
+
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.25,
|
| 503 |
voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
|
| 504 |
) -> Tuple[int, np.ndarray]:
|
| 505 |
"""
|
|
|
|
| 510 |
tool is created for each function wired into your app; docstrings and type
|
| 511 |
hints are used to describe the tool).
|
| 512 |
|
| 513 |
+
Default behavior:
|
| 514 |
+
- Speed defaults to 1.25 (slightly brisk cadence) for clearer, snappier delivery.
|
| 515 |
+
|
| 516 |
Args:
|
| 517 |
text: The text to synthesize (English).
|
| 518 |
+
speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
|
| 519 |
voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
|
| 520 |
|
| 521 |
Returns:
|
|
|
|
| 671 |
fn=Generate_Speech,
|
| 672 |
inputs=[
|
| 673 |
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
| 674 |
+
gr.Slider(minimum=0.5, maximum=2.0, value=1.25, step=0.1, label="Speed"),
|
| 675 |
gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
|
| 676 |
],
|
| 677 |
outputs=gr.Audio(label="Audio", type="numpy"),
|