Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -161,20 +161,46 @@ def generate_speech(text, voice, temperature, top_p, repetition_penalty, max_new
|
|
| 161 |
print(f"Error generating speech: {e}")
|
| 162 |
return None
|
| 163 |
|
| 164 |
-
# Examples for the UI
|
| 165 |
examples = [
|
| 166 |
-
["Hey there
|
| 167 |
-
["
|
| 168 |
-
["
|
| 169 |
-
["
|
| 170 |
-
["
|
| 171 |
-
["
|
| 172 |
-
["
|
| 173 |
-
["
|
| 174 |
]
|
| 175 |
|
| 176 |
-
# Available voices
|
| 177 |
-
VOICES = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
# Available Emotive Tags
|
| 180 |
EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
|
|
@@ -182,17 +208,21 @@ EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>
|
|
| 182 |
# Create Gradio interface
|
| 183 |
with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
|
| 184 |
gr.Markdown(f"""
|
| 185 |
-
#
|
| 186 |
VyvoTTS is a text-to-speech model by Vyvo team using LFM2 architecture, trained on multiple diverse open-source datasets.
|
| 187 |
Since some datasets may contain transcription errors or quality issues, output quality can vary.
|
| 188 |
Higher quality datasets typically produce better speech synthesis results.
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
## Tips for better prompts:
|
| 191 |
- Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
|
| 192 |
- Longer text prompts generally work better than very short phrases
|
| 193 |
- Increasing `repetition_penalty` and `temperature` makes the model speak faster.
|
| 194 |
|
| 195 |
-
**Note:** Output quality may vary depending on the source dataset quality for each voice.
|
| 196 |
""")
|
| 197 |
with gr.Row():
|
| 198 |
with gr.Column(scale=3):
|
|
@@ -203,8 +233,8 @@ with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
|
|
| 203 |
)
|
| 204 |
voice = gr.Dropdown(
|
| 205 |
choices=VOICES,
|
| 206 |
-
value="
|
| 207 |
-
label="Voice"
|
| 208 |
)
|
| 209 |
|
| 210 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
| 161 |
print(f"Error generating speech: {e}")
|
| 162 |
return None
|
| 163 |
|
| 164 |
+
# Examples for the UI - Genshin karakterleri ile
|
| 165 |
examples = [
|
| 166 |
+
["Hey there! I am ready to help you on your adventure in Teyvat.", "Tighnari", 0.6, 0.95, 1.1, 1200],
|
| 167 |
+
["The wind brings new adventures and ancient secrets to discover.", "Kaeya", 0.7, 0.95, 1.1, 1200],
|
| 168 |
+
["Let me share the wisdom of the elements with you, traveler.", "Nahida", 0.6, 0.9, 1.2, 1200],
|
| 169 |
+
["Every journey begins with a single step forward into the unknown.", "Noelle", 0.65, 0.9, 1.1, 1200],
|
| 170 |
+
["The stars above guide us through even the darkest of nights.", "Furina", 0.7, 0.95, 1.1, 1200],
|
| 171 |
+
["Together we can explore the mysteries of this vast world.", "Lyney", 0.65, 0.9, 1.15, 1200],
|
| 172 |
+
["Knowledge is power, but wisdom is knowing how to use it.", "Alhaitham", 0.7, 0.95, 1.1, 1200],
|
| 173 |
+
["The beauty of Sumeru never fails to take my breath away.", "Collei", 0.6, 0.95, 1.1, 1200]
|
| 174 |
]
|
| 175 |
|
| 176 |
+
# Available voices - Genshin karakterleri ve diğerleri
|
| 177 |
+
VOICES = [
|
| 178 |
+
"Stephen_Fry",
|
| 179 |
+
"Tighnari",
|
| 180 |
+
"Thoma",
|
| 181 |
+
"Shikanoin_Heizou",
|
| 182 |
+
"Noelle",
|
| 183 |
+
"Ningguang",
|
| 184 |
+
"Nilou",
|
| 185 |
+
"Neuvillette",
|
| 186 |
+
"Navia",
|
| 187 |
+
"Nahida",
|
| 188 |
+
"Mualani",
|
| 189 |
+
"Lyney",
|
| 190 |
+
"Lynette",
|
| 191 |
+
"Layla",
|
| 192 |
+
"Kaveh",
|
| 193 |
+
"Kaeya",
|
| 194 |
+
"Furina",
|
| 195 |
+
"Dehya",
|
| 196 |
+
"Cyno",
|
| 197 |
+
"Collei",
|
| 198 |
+
"Beidou",
|
| 199 |
+
"Alhaitham",
|
| 200 |
+
"Arataki_Itto",
|
| 201 |
+
"Jenny_Voice",
|
| 202 |
+
"Optimus_Prime"
|
| 203 |
+
]
|
| 204 |
|
| 205 |
# Available Emotive Tags
|
| 206 |
EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
|
|
|
|
| 208 |
# Create Gradio interface
|
| 209 |
with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
|
| 210 |
gr.Markdown(f"""
|
| 211 |
+
# 🎮 VyvoTTS Multi-Speaker
|
| 212 |
VyvoTTS is a text-to-speech model by Vyvo team using LFM2 architecture, trained on multiple diverse open-source datasets.
|
| 213 |
Since some datasets may contain transcription errors or quality issues, output quality can vary.
|
| 214 |
Higher quality datasets typically produce better speech synthesis results.
|
| 215 |
|
| 216 |
+
**Available Character Voices:**
|
| 217 |
+
🌟 Genshin Impact: Tighnari, Thoma, Heizou, Noelle, Ningguang, Nilou, Neuvillette, Navia, Nahida, Mualani, Lyney, Lynette, Layla, Kaveh, Kaeya, Furina, Dehya, Cyno, Collei, Beidou, Alhaitham, Itto
|
| 218 |
+
🎭 Others: Stephen Fry, Jenny Voice, Optimus Prime
|
| 219 |
+
|
| 220 |
## Tips for better prompts:
|
| 221 |
- Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
|
| 222 |
- Longer text prompts generally work better than very short phrases
|
| 223 |
- Increasing `repetition_penalty` and `temperature` makes the model speak faster.
|
| 224 |
|
| 225 |
+
**Note:** Output quality may vary depending on the source dataset quality for each character voice.
|
| 226 |
""")
|
| 227 |
with gr.Row():
|
| 228 |
with gr.Column(scale=3):
|
|
|
|
| 233 |
)
|
| 234 |
voice = gr.Dropdown(
|
| 235 |
choices=VOICES,
|
| 236 |
+
value="Tighnari",
|
| 237 |
+
label="Character Voice"
|
| 238 |
)
|
| 239 |
|
| 240 |
with gr.Accordion("Advanced Settings", open=False):
|