neutts-air

Running

App Files Files Community

StorageDater commited on 16 days ago

Commit

2617f51

verified ·

1 Parent(s): d2078f6

Upload app (1).py

Browse files

Files changed (1) hide show

app (1).py +60 -0

app (1).py ADDED Viewed

	@@ -0,0 +1,60 @@

+import spaces
+import os
+import sys
+sys.path.append("neutts-air")
+from neuttsair.neutts import NeuTTSAir
+import numpy as np
+import gradio as gr
+SAMPLES_PATH = os.path.join(os.getcwd(), "neutts-air", "samples")
+DEFAULT_REF_TEXT = "So I'm live on radio. And I say, well, my dear friend James here clearly, and the whole room just froze. Turns out I'd completely misspoken and mentioned our other friend."
+DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
+DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."
+# --- Force CPU usage ---
+tts = NeuTTSAir(
+    backbone_repo="neuphonic/neutts-air",
+    backbone_device="cpu",
+    codec_repo="neuphonic/neucodec",
+    codec_device="cpu"
+)
+def infer(
+    ref_text: str,
+    ref_audio_path: str,
+    gen_text: str,
+) -> tuple[int, np.ndarray]:
+    """
+    Generates speech using NeuTTS-Air given a reference audio and text, and new text to synthesize.
+    Args:
+        ref_text (str): The text corresponding to the reference audio.
+        ref_audio_path (str): The file path to the reference audio.
+        gen_text (str): The new text to synthesize.
+    Returns:
+        tuple [int, np.ndarray]: A tuple containing the sample rate (24000) and the generated audio waveform as a numpy array.
+    """
+    gr.Info("Starting inference request (CPU mode)!")
+    gr.Info("Encoding reference...")
+    ref_codes = tts.encode_reference(ref_audio_path)
+    gr.Info(f"Generating audio for input text: {gen_text}")
+    wav = tts.infer(gen_text, ref_codes, ref_text)
+    return (24_000, wav)
+demo = gr.Interface(
+    fn=infer,
+    inputs=[
+        gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT),
+        gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH),
+        gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT),
+    ],
+    outputs=gr.Audio(type="numpy", label="Generated Speech"),
+    title="NeuTTS-Air☁️ (CPU Mode)",
+    description="Upload a reference audio sample, provide the reference text, and enter new text to synthesize (running on CPU)."
+)
+if __name__ == "__main__":
+    demo.launch(allowed_paths=[SAMPLES_PATH], mcp_server=True, inbrowser=True)