Spaces:
Running
on
Zero
Running
on
Zero
Add MCP server (#6)
Browse files- Add MCP server (778db97e13636a827e72e88a60aaf9960bb3b2d7)
Co-authored-by: Apolinário from multimodal AI art <multimodalart@users.noreply.huggingface.co>
app.py
CHANGED
|
@@ -212,7 +212,16 @@ def normalize_text(transcript: str):
|
|
| 212 |
|
| 213 |
@spaces.GPU
|
| 214 |
def initialize_engine(model_path, audio_tokenizer_path) -> bool:
|
| 215 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
global engine
|
| 217 |
try:
|
| 218 |
logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
|
|
@@ -301,7 +310,26 @@ def text_to_speech(
|
|
| 301 |
ras_win_len=7,
|
| 302 |
ras_win_max_num_repeat=2,
|
| 303 |
):
|
| 304 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
global engine
|
| 306 |
|
| 307 |
if engine is None:
|
|
@@ -518,6 +546,15 @@ def create_ui():
|
|
| 518 |
|
| 519 |
# Function to play voice sample when clicking on a row
|
| 520 |
def play_voice_sample(evt: gr.SelectData):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
try:
|
| 522 |
# Get the preset name from the clicked row
|
| 523 |
preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
|
|
@@ -541,6 +578,16 @@ def create_ui():
|
|
| 541 |
|
| 542 |
# Function to handle template selection
|
| 543 |
def apply_template(template_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
if template_name in PREDEFINED_EXAMPLES:
|
| 545 |
template = PREDEFINED_EXAMPLES[template_name]
|
| 546 |
# Enable voice preset and custom reference only for voice-clone template
|
|
@@ -642,8 +689,8 @@ def main():
|
|
| 642 |
|
| 643 |
# Create and launch the UI
|
| 644 |
demo = create_ui()
|
| 645 |
-
demo.launch(server_name=args.host, server_port=args.port)
|
| 646 |
|
| 647 |
|
| 648 |
if __name__ == "__main__":
|
| 649 |
-
main()
|
|
|
|
| 212 |
|
| 213 |
@spaces.GPU
|
| 214 |
def initialize_engine(model_path, audio_tokenizer_path) -> bool:
|
| 215 |
+
"""
|
| 216 |
+
Initialize the HiggsAudioServeEngine with the specified model and tokenizer.
|
| 217 |
+
|
| 218 |
+
Args:
|
| 219 |
+
model_path: Path to the model to load
|
| 220 |
+
audio_tokenizer_path: Path to the audio tokenizer to load
|
| 221 |
+
|
| 222 |
+
Returns:
|
| 223 |
+
True if initialization was successful, False otherwise
|
| 224 |
+
"""
|
| 225 |
global engine
|
| 226 |
try:
|
| 227 |
logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
|
|
|
|
| 310 |
ras_win_len=7,
|
| 311 |
ras_win_max_num_repeat=2,
|
| 312 |
):
|
| 313 |
+
"""
|
| 314 |
+
Convert text to speech using HiggsAudioServeEngine.
|
| 315 |
+
|
| 316 |
+
Args:
|
| 317 |
+
text: The text to convert to speech
|
| 318 |
+
voice_preset: The voice preset to use (or "EMPTY" for no preset)
|
| 319 |
+
reference_audio: Optional path to reference audio file
|
| 320 |
+
reference_text: Optional transcript of the reference audio
|
| 321 |
+
max_completion_tokens: Maximum number of tokens to generate
|
| 322 |
+
temperature: Sampling temperature for generation
|
| 323 |
+
top_p: Top-p sampling parameter
|
| 324 |
+
top_k: Top-k sampling parameter
|
| 325 |
+
system_prompt: System prompt to guide the model
|
| 326 |
+
stop_strings: Dataframe containing stop strings
|
| 327 |
+
ras_win_len: Window length for repetition avoidance sampling
|
| 328 |
+
ras_win_max_num_repeat: Maximum number of repetitions allowed in the window
|
| 329 |
+
|
| 330 |
+
Returns:
|
| 331 |
+
Tuple of (generated_text, (sample_rate, audio_data)) where audio_data is int16 numpy array
|
| 332 |
+
"""
|
| 333 |
global engine
|
| 334 |
|
| 335 |
if engine is None:
|
|
|
|
| 546 |
|
| 547 |
# Function to play voice sample when clicking on a row
|
| 548 |
def play_voice_sample(evt: gr.SelectData):
|
| 549 |
+
"""
|
| 550 |
+
Play a voice sample when a row is clicked in the voice samples table.
|
| 551 |
+
|
| 552 |
+
Args:
|
| 553 |
+
evt: The select event containing the clicked row index
|
| 554 |
+
|
| 555 |
+
Returns:
|
| 556 |
+
Path to the voice sample audio file, or None if not found
|
| 557 |
+
"""
|
| 558 |
try:
|
| 559 |
# Get the preset name from the clicked row
|
| 560 |
preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
|
|
|
|
| 578 |
|
| 579 |
# Function to handle template selection
|
| 580 |
def apply_template(template_name):
|
| 581 |
+
"""
|
| 582 |
+
Apply a predefined template to the UI components.
|
| 583 |
+
|
| 584 |
+
Args:
|
| 585 |
+
template_name: Name of the template to apply
|
| 586 |
+
|
| 587 |
+
Returns:
|
| 588 |
+
Tuple of updated values for system_prompt, input_text, template_description,
|
| 589 |
+
voice_preset, custom_reference_accordion, voice_samples_section, and ras_win_len
|
| 590 |
+
"""
|
| 591 |
if template_name in PREDEFINED_EXAMPLES:
|
| 592 |
template = PREDEFINED_EXAMPLES[template_name]
|
| 593 |
# Enable voice preset and custom reference only for voice-clone template
|
|
|
|
| 689 |
|
| 690 |
# Create and launch the UI
|
| 691 |
demo = create_ui()
|
| 692 |
+
demo.launch(server_name=args.host, server_port=args.port, mcp_server=True)
|
| 693 |
|
| 694 |
|
| 695 |
if __name__ == "__main__":
|
| 696 |
+
main()
|