Spaces:
Running
Running
New TTS: OpenAudio S1 Mini (by Fish Audio)
Browse files- app/models.py +32 -2
app/models.py
CHANGED
|
@@ -35,7 +35,6 @@ AVAILABLE_MODELS = {
|
|
| 35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 37 |
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 38 |
-
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
| 39 |
|
| 40 |
# E2 & F5 TTS
|
| 41 |
# F5 model
|
|
@@ -119,7 +118,11 @@ AVAILABLE_MODELS = {
|
|
| 119 |
# Chatterbox
|
| 120 |
'ResembleAI/Chatterbox': 'ResembleAI/Chatterbox',
|
| 121 |
|
|
|
|
|
|
|
|
|
|
| 122 |
# HF TTS w issues
|
|
|
|
| 123 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
| 124 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
| 125 |
# 'amphion/Text-to-Speech': '/predict#0', # disabled also on original HF space due to poor ratings
|
|
@@ -304,6 +307,16 @@ HF_SPACES = {
|
|
| 304 |
# 'emoji': '😷',
|
| 305 |
},
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
# F5 TTS
|
| 308 |
'mrfakename/E2-F5-TTS': {
|
| 309 |
'name': 'F5 TTS',
|
|
@@ -687,6 +700,23 @@ OVERRIDE_INPUTS = {
|
|
| 687 |
'use_memory_cache': "never",
|
| 688 |
},
|
| 689 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
# F5
|
| 691 |
'mrfakename/E2-F5-TTS': {
|
| 692 |
'ref_audio_input': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
|
|
@@ -966,7 +996,7 @@ closed_source = [
|
|
| 966 |
]
|
| 967 |
|
| 968 |
# top five models in order to always have one of them picked and scrutinized
|
| 969 |
-
top_five = ['PHBJT/multi_parler_tts', '
|
| 970 |
|
| 971 |
# prioritize low vote models
|
| 972 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
|
|
|
| 35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 37 |
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
|
|
|
| 38 |
|
| 39 |
# E2 & F5 TTS
|
| 40 |
# F5 model
|
|
|
|
| 118 |
# Chatterbox
|
| 119 |
'ResembleAI/Chatterbox': 'ResembleAI/Chatterbox',
|
| 120 |
|
| 121 |
+
# OpenAudio S1 (Fish Audio)
|
| 122 |
+
'fishaudio/openaudio-s1-mini': 'fishaudio/openaudio-s1-mini',
|
| 123 |
+
|
| 124 |
# HF TTS w issues
|
| 125 |
+
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Discontinued for OpenAudio S1
|
| 126 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
| 127 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
| 128 |
# 'amphion/Text-to-Speech': '/predict#0', # disabled also on original HF space due to poor ratings
|
|
|
|
| 307 |
# 'emoji': '😷',
|
| 308 |
},
|
| 309 |
|
| 310 |
+
# OpenAudio S1 (Fish Audio)
|
| 311 |
+
'fishaudio/openaudio-s1-mini': {
|
| 312 |
+
'name': 'OpenAudio S1 Mini',
|
| 313 |
+
'function': '/partial',
|
| 314 |
+
'text_param_index': 'text',
|
| 315 |
+
'return_audio_index': 0,
|
| 316 |
+
'series': 'Fish Speech',
|
| 317 |
+
# 'emoji': '😷',
|
| 318 |
+
},
|
| 319 |
+
|
| 320 |
# F5 TTS
|
| 321 |
'mrfakename/E2-F5-TTS': {
|
| 322 |
'name': 'F5 TTS',
|
|
|
|
| 700 |
'use_memory_cache': "never",
|
| 701 |
},
|
| 702 |
|
| 703 |
+
# OpenAudio S1 (Fish Audio)
|
| 704 |
+
'fishaudio/openaudio-s1-mini': {
|
| 705 |
+
# 'reference_id': "Hello!!", # voice id string - https://fish.audio/discovery/
|
| 706 |
+
# 'reference_audio': None,
|
| 707 |
+
# 'reference_text': None,
|
| 708 |
+
'reference_audio': DEFAULT_VOICE_SAMPLE,
|
| 709 |
+
'reference_text': DEFAULT_VOICE_TRANSCRIPT,
|
| 710 |
+
'max_new_tokens': 0,
|
| 711 |
+
'chunk_length': 0,
|
| 712 |
+
'top_p': 0.9,
|
| 713 |
+
'repetition_penalty': 1.1,
|
| 714 |
+
'temperature': 0.9,
|
| 715 |
+
'seed': 0,
|
| 716 |
+
'use_memory_cache': "on",
|
| 717 |
+
# 'emoji': '😷',
|
| 718 |
+
},
|
| 719 |
+
|
| 720 |
# F5
|
| 721 |
'mrfakename/E2-F5-TTS': {
|
| 722 |
'ref_audio_input': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
|
|
|
|
| 996 |
]
|
| 997 |
|
| 998 |
# top five models in order to always have one of them picked and scrutinized
|
| 999 |
+
top_five = ['PHBJT/multi_parler_tts', 'fishaudio/openaudio-s1-mini', 'ResembleAI/Chatterbox']
|
| 1000 |
|
| 1001 |
# prioritize low vote models
|
| 1002 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|