diego2554 commited on
Commit
1a3d72c
·
verified ·
1 Parent(s): 9c33716

Update app.py

Browse files

Enhancements added:

1. **Emotion Categories & Submenus**: Emotions are now organized into "Common" and "Complex" categories for easier selection.
2. **Dropdown + Textbox Sync**: Selecting an emotion from the dropdown automatically updates the emotion textbox; users can still type custom emotions.
3. **Temporary File Cleanup**: Generated audio files are automatically cleaned up on exit.
4. **Retry Logic**: Audio generation includes up to 3 retries in case of network/API issues.
5. **NSFW Check (Optional)**: Functionality included but commented out; can be activated if needed.
6. **Improved UX**: More intuitive and robust interface for Hugging Face Spaces with categories and pre-defined complex emotions.

Files changed (1) hide show
  1. app.py +91 -123
app.py CHANGED
@@ -5,164 +5,145 @@ import random
5
  import urllib.parse
6
  import tempfile
7
  import os
 
 
8
 
9
  NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
10
  TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE")
11
 
12
-
13
  if not NSFW_URL_TEMPLATE:
14
  raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
15
  if not TTS_URL_TEMPLATE:
16
  raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
 
17
  # VOICES
18
  VOICES = [
19
- "alloy", "echo", "fable", "onyx", "nova", "shimmer", # Standard OpenAI Voices
20
- "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" # Some additional pre-trained
21
  ]
22
 
23
-
24
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def check_nsfw(prompt: str) -> bool:
26
- global NSFW_URL_TEMPLATE
27
  try:
28
  encoded_prompt = urllib.parse.quote(prompt)
29
  url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
30
- print(f"DEBUG: Checking NSFW URL: {url.split('?')[0]}... (query params hidden)")
31
-
32
  response = requests.get(url, timeout=20)
33
  response.raise_for_status()
34
-
35
  result = response.text.strip().upper()
36
- print(f"DEBUG: NSFW Check Response: '{result}'")
37
-
38
- if result == "YES":
39
- return True
40
- elif result == "NO":
41
- return False
42
- else:
43
- print(f"Warning: Unexpected response from NSFW checker: {response.text}")
44
- return True # unexpected responses = potentially NSFW
45
-
46
  except requests.exceptions.RequestException as e:
47
  print(f"Error during NSFW check: {e}")
48
- raise gr.Error(f"Failed to check prompt safety.")
49
  except Exception as e:
50
  print(f"Unexpected error during NSFW check: {e}")
51
- raise gr.Error(f"An unexpected error occurred during safety check. Please wait for a second and try again.")
52
-
53
 
 
54
  def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
55
- # Generates audio using the API from server
56
- global TTS_URL_TEMPLATE
57
- try:
58
- encoded_prompt = urllib.parse.quote(prompt)
59
- encoded_emotion = urllib.parse.quote(emotion)
60
-
61
- url = TTS_URL_TEMPLATE.format(
62
- prompt=encoded_prompt,
63
- emotion=encoded_emotion,
64
- voice=voice,
65
- seed=seed
66
- )
67
- print(f"DEBUG: Generating Audio URL: {url.split('?')[0]}... (query params hidden)")
68
-
69
- response = requests.get(url, timeout=60)
70
- response.raise_for_status()
71
-
72
- content_type = response.headers.get('content-type', '').lower()
73
- if 'audio' not in content_type:
74
- print(f"Warning: Unexpected content type received: {content_type}")
75
- print(f"Response Text: {response.text[:500]}")
76
- raise gr.Error(f"API did not return audio.")
77
-
78
- return response.content
79
-
80
- except requests.exceptions.RequestException as e:
81
- print(f"Error during audio generation: {e}")
82
- error_details = ""
83
- if hasattr(e, 'response') and e.response is not None:
84
- error_details = e.response.text[:200]
85
- raise gr.Error(f"Failed to generate audio. Please wait for a second and try again.")
86
- except Exception as e:
87
- print(f"Unexpected error during audio generation: {e}")
88
- raise gr.Error(f"An unexpected error occurred during audio generation. Please wait for a second and try again.")
89
-
90
-
91
-
92
  def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int):
93
-
94
- print("\n\n\n"+prompt+"\n\n\n")
95
  if not prompt:
96
  raise gr.Error("Prompt cannot be empty.")
97
  if not emotion:
98
  emotion = "neutral"
99
- print("Warning: No emotion provided, defaulting to 'neutral'.")
100
  if not voice:
101
- raise gr.Error("Please select a voice.")
102
-
103
  seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
104
- print(f"Using Seed: {seed}")
105
-
106
- # check NSFW
107
- print("Checking prompt safety...")
108
- try:
109
- # is_nsfw = check_nsfw(prompt)
110
- is_nsfw = False
111
- except gr.Error as e:
112
- return None, f"There was an error. Please wait for a second and try again."
113
-
114
  if is_nsfw:
115
- print("Prompt flagged as inappropriate.")
116
  return None, "Error: The prompt was flagged as inappropriate and cannot be processed."
117
-
118
- # if not nsfw
119
- print("Prompt is safe. Generating audio...")
120
  try:
121
  audio_bytes = generate_audio(prompt, voice, emotion, seed)
122
-
123
- # audio save to a temporary file
124
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
125
  temp_audio_file.write(audio_bytes)
126
  temp_file_path = temp_audio_file.name
127
- print(f"Audio saved temporarily to: {temp_file_path}")
128
-
129
  return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}."
130
-
131
  except gr.Error as e:
132
- return None, str(e)
133
  except Exception as e:
134
- print(f"Unexpected error in main function: {e}")
135
- return None, f"An unexpected error occurred: {e}"
136
-
137
 
 
 
 
138
 
 
 
 
139
 
 
140
  def toggle_seed_input(use_random_seed):
141
-
142
  return gr.update(visible=not use_random_seed, value=12345)
143
 
 
144
  with gr.Blocks() as app:
145
  gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
146
- gr.Markdown(
147
- """Enter text, choose a voice and emotion, and generate audio.
148
- The text will be checked for appropriateness before generation.
149
- Use it as much as you want.
150
-
151
-
152
- **Like & follow** for more AI projects:
153
-
154
-
155
- • Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/)
156
- • X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09)
157
- • Discord: nihal_gazi_io"""
158
- )
159
 
160
  with gr.Row():
161
  with gr.Column(scale=2):
162
- prompt_input = gr.Textbox(label="Prompt", placeholder="Enter the text you want to convert to speech...")
163
- emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
164
- voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
 
 
 
 
 
 
165
  with gr.Column(scale=1):
 
166
  random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
167
  seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
168
 
@@ -172,27 +153,17 @@ with gr.Blocks() as app:
172
  audio_output = gr.Audio(label="Generated Audio", type="filepath")
173
  status_output = gr.Textbox(label="Status")
174
 
175
-
176
- random_seed_checkbox.change(
177
- fn=toggle_seed_input,
178
- inputs=[random_seed_checkbox],
179
- outputs=[seed_input]
180
- )
181
-
182
  submit_button.click(
183
  fn=text_to_speech_app,
184
- inputs=[
185
- prompt_input,
186
- voice_dropdown,
187
- emotion_input,
188
- random_seed_checkbox,
189
- seed_input
190
- ],
191
  outputs=[audio_output, status_output],
192
  concurrency_limit=30
193
  )
194
 
195
-
196
  gr.Examples(
197
  examples=[
198
  ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345],
@@ -203,14 +174,11 @@ with gr.Blocks() as app:
203
  inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
204
  outputs=[audio_output, status_output],
205
  fn=text_to_speech_app,
206
- cache_examples=False,
207
  )
208
 
209
-
210
  if __name__ == "__main__":
211
-
212
  if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE:
213
  app.launch()
214
  else:
215
  print("ERROR: Cannot launch app. Required API URL secrets are missing.")
216
-
 
5
  import urllib.parse
6
  import tempfile
7
  import os
8
+ import atexit
9
+ import time
10
 
11
  NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
12
  TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE")
13
 
 
14
  if not NSFW_URL_TEMPLATE:
15
  raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
16
  if not TTS_URL_TEMPLATE:
17
  raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
18
+
19
  # VOICES
20
  VOICES = [
21
+ "alloy", "echo", "fable", "onyx", "nova", "shimmer",
22
+ "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
23
  ]
24
 
25
+ # EMOTION CATEGORIES
26
+ EMOTION_CATEGORIES = {
27
+ "Common": ["neutral", "happy", "sad", "excited", "angry", "calm", "fearful", "joyful", "surprised"],
28
+ "Complex": [
29
+ "sarcastic", "sarcastic and mocking", "sad and depressed, with stammering",
30
+ "excited and joyful", "angry and frustrated", "calm and soothing",
31
+ "nervous and anxious", "happy and relieved", "fearful and tense"
32
+ ]
33
+ }
34
+
35
+ # Para limpiar archivos temporales
36
+ temp_files = []
37
+
38
+ def cleanup_temp_files():
39
+ for f in temp_files:
40
+ try:
41
+ os.remove(f)
42
+ except:
43
+ pass
44
+
45
+ atexit.register(cleanup_temp_files)
46
+
47
+ # Función NSFW
48
  def check_nsfw(prompt: str) -> bool:
 
49
  try:
50
  encoded_prompt = urllib.parse.quote(prompt)
51
  url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
 
 
52
  response = requests.get(url, timeout=20)
53
  response.raise_for_status()
 
54
  result = response.text.strip().upper()
55
+ return result == "YES"
 
 
 
 
 
 
 
 
 
56
  except requests.exceptions.RequestException as e:
57
  print(f"Error during NSFW check: {e}")
58
+ return True
59
  except Exception as e:
60
  print(f"Unexpected error during NSFW check: {e}")
61
+ return True
 
62
 
63
+ # Generación de audio con reintentos
64
  def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
65
+ encoded_prompt = urllib.parse.quote(prompt)
66
+ encoded_emotion = urllib.parse.quote(emotion)
67
+ url = TTS_URL_TEMPLATE.format(
68
+ prompt=encoded_prompt,
69
+ emotion=encoded_emotion,
70
+ voice=voice,
71
+ seed=seed
72
+ )
73
+ for attempt in range(3):
74
+ try:
75
+ response = requests.get(url, timeout=60)
76
+ response.raise_for_status()
77
+ content_type = response.headers.get('content-type', '').lower()
78
+ if 'audio' not in content_type:
79
+ print(f"Warning: Unexpected content type: {content_type}")
80
+ print(f"Response Text: {response.text[:500]}")
81
+ raise gr.Error("API did not return audio.")
82
+ return response.content
83
+ except requests.exceptions.RequestException as e:
84
+ print(f"Attempt {attempt+1} failed: {e}")
85
+ if attempt == 2:
86
+ raise gr.Error("Failed to generate audio after 3 attempts.")
87
+ time.sleep(1)
88
+
89
+ # Función principal
 
 
 
 
 
 
 
 
 
 
 
 
90
  def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int):
 
 
91
  if not prompt:
92
  raise gr.Error("Prompt cannot be empty.")
93
  if not emotion:
94
  emotion = "neutral"
 
95
  if not voice:
96
+ raise gr.Error("Please select a voice.")
 
97
  seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
98
+
99
+ # Check NSFW (opcional)
100
+ # is_nsfw = check_nsfw(prompt)
101
+ is_nsfw = False
 
 
 
 
 
 
102
  if is_nsfw:
 
103
  return None, "Error: The prompt was flagged as inappropriate and cannot be processed."
104
+
 
 
105
  try:
106
  audio_bytes = generate_audio(prompt, voice, emotion, seed)
 
 
107
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
108
  temp_audio_file.write(audio_bytes)
109
  temp_file_path = temp_audio_file.name
110
+ temp_files.append(temp_file_path)
 
111
  return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}."
 
112
  except gr.Error as e:
113
+ return None, str(e)
114
  except Exception as e:
115
+ return None, f"Unexpected error: {e}"
 
 
116
 
117
+ # Sincronización dropdown <-> textbox
118
+ def update_emotion_textbox(selected_emotion):
119
+ return selected_emotion
120
 
121
+ # Actualizar dropdown según categoría
122
+ def update_emotion_options(category):
123
+ return gr.update(choices=EMOTION_CATEGORIES[category], value=EMOTION_CATEGORIES[category][0])
124
 
125
+ # Toggle seed input
126
  def toggle_seed_input(use_random_seed):
 
127
  return gr.update(visible=not use_random_seed, value=12345)
128
 
129
+ # Gradio UI
130
  with gr.Blocks() as app:
131
  gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
132
+ gr.Markdown("Enter text, choose a voice and emotion, and generate audio.")
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  with gr.Row():
135
  with gr.Column(scale=2):
136
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Type the text here...")
137
+
138
+ with gr.Row():
139
+ emotion_input = gr.Textbox(label="Emotion Style", placeholder="Type an emotion or select from the dropdown...")
140
+
141
+ with gr.Column():
142
+ category_dropdown = gr.Dropdown(label="Emotion Category", choices=list(EMOTION_CATEGORIES.keys()), value="Common", interactive=True)
143
+ emotion_dropdown = gr.Dropdown(label="Select Emotion", choices=EMOTION_CATEGORIES["Common"], value="neutral", interactive=True)
144
+
145
  with gr.Column(scale=1):
146
+ voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
147
  random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
148
  seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
149
 
 
153
  audio_output = gr.Audio(label="Generated Audio", type="filepath")
154
  status_output = gr.Textbox(label="Status")
155
 
156
+ # Eventos
157
+ category_dropdown.change(fn=update_emotion_options, inputs=[category_dropdown], outputs=[emotion_dropdown])
158
+ emotion_dropdown.change(fn=update_emotion_textbox, inputs=[emotion_dropdown], outputs=[emotion_input])
159
+ random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input])
 
 
 
160
  submit_button.click(
161
  fn=text_to_speech_app,
162
+ inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
 
 
 
 
 
 
163
  outputs=[audio_output, status_output],
164
  concurrency_limit=30
165
  )
166
 
 
167
  gr.Examples(
168
  examples=[
169
  ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345],
 
174
  inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
175
  outputs=[audio_output, status_output],
176
  fn=text_to_speech_app,
177
+ cache_examples=False,
178
  )
179
 
 
180
  if __name__ == "__main__":
 
181
  if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE:
182
  app.launch()
183
  else:
184
  print("ERROR: Cannot launch app. Required API URL secrets are missing.")