Spaces:
Running
Running
OpenAudio JSON None => 'None' fix; XTTS disabled
Browse files- app/models.py +10 -12
- test_tts_xtts.py +5 -3
app/models.py
CHANGED
|
@@ -26,7 +26,7 @@ AVAILABLE_MODELS = {
|
|
| 26 |
# '<keyname>':'<Space URL>'
|
| 27 |
# gradio version that works with most spaces: 4.29
|
| 28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
| 29 |
-
'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
| 30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
| 31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
|
@@ -146,7 +146,7 @@ HF_SPACES = {
|
|
| 146 |
'coqui/xtts': {
|
| 147 |
'name': 'XTTS v2',
|
| 148 |
'function': '/predict',
|
| 149 |
-
'text_param_index':
|
| 150 |
'return_audio_index': 0,
|
| 151 |
'series': 'XTTS',
|
| 152 |
},
|
|
@@ -186,7 +186,7 @@ HF_SPACES = {
|
|
| 186 |
'function': '/tts',
|
| 187 |
'text_param_index': 0,
|
| 188 |
'return_audio_index': 0,
|
| 189 |
-
'series': 'MetaVoice
|
| 190 |
'emoji': '😷', # broken space
|
| 191 |
},
|
| 192 |
|
|
@@ -304,7 +304,7 @@ HF_SPACES = {
|
|
| 304 |
'text_param_index': 'text',
|
| 305 |
'return_audio_index': 0,
|
| 306 |
'series': 'Fish Speech',
|
| 307 |
-
|
| 308 |
},
|
| 309 |
|
| 310 |
# OpenAudio S1 (Fish Audio)
|
|
@@ -599,7 +599,8 @@ OVERRIDE_INPUTS = {
|
|
| 599 |
# },
|
| 600 |
# tonyassi ZeroGPU space of XTTS:
|
| 601 |
'coqui/xtts': {
|
| 602 |
-
|
|
|
|
| 603 |
},
|
| 604 |
'collabora/WhisperSpeech': {
|
| 605 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
|
@@ -698,19 +699,16 @@ OVERRIDE_INPUTS = {
|
|
| 698 |
|
| 699 |
# OpenAudio S1 (Fish Audio)
|
| 700 |
'fishaudio/openaudio-s1-mini': {
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
'reference_audio': DEFAULT_VOICE_SAMPLE,
|
| 705 |
-
'reference_text': DEFAULT_VOICE_TRANSCRIPT,
|
| 706 |
'max_new_tokens': 0,
|
| 707 |
'chunk_length': 0,
|
| 708 |
'top_p': 0.9,
|
| 709 |
'repetition_penalty': 1.1,
|
| 710 |
'temperature': 0.9,
|
| 711 |
-
'seed':
|
| 712 |
'use_memory_cache': "on",
|
| 713 |
-
# 'emoji': '😷',
|
| 714 |
},
|
| 715 |
|
| 716 |
# F5
|
|
|
|
| 26 |
# '<keyname>':'<Space URL>'
|
| 27 |
# gradio version that works with most spaces: 4.29
|
| 28 |
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
|
| 29 |
+
# 'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
|
| 30 |
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
| 31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
|
|
|
| 146 |
'coqui/xtts': {
|
| 147 |
'name': 'XTTS v2',
|
| 148 |
'function': '/predict',
|
| 149 |
+
'text_param_index': 0,
|
| 150 |
'return_audio_index': 0,
|
| 151 |
'series': 'XTTS',
|
| 152 |
},
|
|
|
|
| 186 |
'function': '/tts',
|
| 187 |
'text_param_index': 0,
|
| 188 |
'return_audio_index': 0,
|
| 189 |
+
'series': 'MetaVoice',
|
| 190 |
'emoji': '😷', # broken space
|
| 191 |
},
|
| 192 |
|
|
|
|
| 304 |
'text_param_index': 'text',
|
| 305 |
'return_audio_index': 0,
|
| 306 |
'series': 'Fish Speech',
|
| 307 |
+
'emoji': '😵', # redirects to OpenAudio
|
| 308 |
},
|
| 309 |
|
| 310 |
# OpenAudio S1 (Fish Audio)
|
|
|
|
| 599 |
# },
|
| 600 |
# tonyassi ZeroGPU space of XTTS:
|
| 601 |
'coqui/xtts': {
|
| 602 |
+
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
| 603 |
+
# 'audio': DEFAULT_VOICE_SAMPLE, # voice sample
|
| 604 |
},
|
| 605 |
'collabora/WhisperSpeech': {
|
| 606 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
|
|
|
| 699 |
|
| 700 |
# OpenAudio S1 (Fish Audio)
|
| 701 |
'fishaudio/openaudio-s1-mini': {
|
| 702 |
+
'reference_id': None,
|
| 703 |
+
'reference_audio': handle_file('voice_samples/English.wav'),
|
| 704 |
+
'reference_text': 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
|
|
|
|
|
|
|
| 705 |
'max_new_tokens': 0,
|
| 706 |
'chunk_length': 0,
|
| 707 |
'top_p': 0.9,
|
| 708 |
'repetition_penalty': 1.1,
|
| 709 |
'temperature': 0.9,
|
| 710 |
+
'seed': 1,
|
| 711 |
'use_memory_cache': "on",
|
|
|
|
| 712 |
},
|
| 713 |
|
| 714 |
# F5
|
test_tts_xtts.py
CHANGED
|
@@ -18,7 +18,9 @@ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format=
|
|
| 18 |
# )
|
| 19 |
# tony's space
|
| 20 |
result = client.predict(
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
)
|
|
|
|
| 18 |
# )
|
| 19 |
# tony's space
|
| 20 |
result = client.predict(
|
| 21 |
+
"Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
|
| 22 |
+
'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
|
| 23 |
+
# fn_index=1
|
| 24 |
+
# handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'), # voice sample
|
| 25 |
+
# api_name="/predict"
|
| 26 |
)
|