neutts-air

Running

App Files Files Community

StorageDater commited on 16 days ago

Commit

d76158a

verified ·

1 Parent(s): 2617f51

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -38

app.py CHANGED Viewed

@@ -1,17 +1,27 @@
 import spaces
 import os
 import sys
-sys.path.append("neutts-air")
-from neuttsair.neutts import NeuTTSAir
 import numpy as np
 import gradio as gr
-SAMPLES_PATH = os.path.join(os.getcwd(), "neutts-air", "samples")
-DEFAULT_REF_TEXT = "So I'm live on radio. And I say, well, my dear friend James here clearly, and the whole room just froze. Turns out I'd completely misspoken and mentioned our other friend."
 DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
 DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."
-# --- Force CPU usage ---
 tts = NeuTTSAir(
     backbone_repo="neuphonic/neutts-air",
     backbone_device="cpu",
@@ -19,42 +29,82 @@ tts = NeuTTSAir(
     codec_device="cpu"
 )
-def infer(
-    ref_text: str,
-    ref_audio_path: str,
-    gen_text: str,
-) -> tuple[int, np.ndarray]:
-    """
-    Generates speech using NeuTTS-Air given a reference audio and text, and new text to synthesize.
-    Args:
-        ref_text (str): The text corresponding to the reference audio.
-        ref_audio_path (str): The file path to the reference audio.
-        gen_text (str): The new text to synthesize.
-    Returns:
-        tuple [int, np.ndarray]: A tuple containing the sample rate (24000) and the generated audio waveform as a numpy array.
-    """
-    gr.Info("Starting inference request (CPU mode)!")
-    gr.Info("Encoding reference...")
     ref_codes = tts.encode_reference(ref_audio_path)
-    gr.Info(f"Generating audio for input text: {gen_text}")
     wav = tts.infer(gen_text, ref_codes, ref_text)
-    return (24_000, wav)
-demo = gr.Interface(
-    fn=infer,
-    inputs=[
-        gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT),
-        gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH),
-        gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT),
-    ],
-    outputs=gr.Audio(type="numpy", label="Generated Speech"),
-    title="NeuTTS-Air☁️ (CPU Mode)",
-    description="Upload a reference audio sample, provide the reference text, and enter new text to synthesize (running on CPU)."
-)
 if __name__ == "__main__":
-    demo.launch(allowed_paths=[SAMPLES_PATH], mcp_server=True, inbrowser=True)

 import spaces
 import os
 import sys
+import time
+import json
 import numpy as np
 import gradio as gr
+import soundfile as sf
+from datetime import datetime
+sys.path.append("neutts-air")
+from neuttsair.neutts import NeuTTSAir
+# === Đường dẫn cơ bản ===
+BASE_PATH = os.getcwd()
+SAMPLES_PATH = os.path.join(BASE_PATH, "neutts-air", "samples")
+HISTORY_PATH = os.path.join(BASE_PATH, "history")
+os.makedirs(HISTORY_PATH, exist_ok=True)
+DEFAULT_REF_TEXT = "So I'm live on radio..."
 DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
 DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."
+# === Khởi tạo NeuTTS-Air ở CPU ===
 tts = NeuTTSAir(
     backbone_repo="neuphonic/neutts-air",
     backbone_device="cpu",
     codec_device="cpu"
 )
+# === Hàm xử lý chính ===
+def infer(ref_text, ref_audio_path, gen_text):
+    gr.Info("Bắt đầu xử lý (CPU mode)...")
+    # Mã hóa reference
     ref_codes = tts.encode_reference(ref_audio_path)
+    # Sinh âm thanh mới
     wav = tts.infer(gen_text, ref_codes, ref_text)
+    sr = 24000
+    # === Lưu vào history ===
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    fname = f"tts_{timestamp}.wav"
+    fpath = os.path.join(HISTORY_PATH, fname)
+    sf.write(fpath, wav, sr)
+    # Lưu metadata
+    meta = {
+        "timestamp": timestamp,
+        "ref_text": ref_text,
+        "gen_text": gen_text,
+        "ref_audio": os.path.basename(ref_audio_path),
+        "output_audio": fname,
+    }
+    with open(os.path.join(HISTORY_PATH, f"{timestamp}.json"), "w", encoding="utf-8") as f:
+        json.dump(meta, f, ensure_ascii=False, indent=2)
+    gr.Info(f"Đã lưu file: {fpath}")
+    return sr, wav, f"Lưu thành công: {fname}"
+# === Hàm tải lại lịch sử ===
+def load_history():
+    items = []
+    for file in sorted(os.listdir(HISTORY_PATH)):
+        if file.endswith(".json"):
+            with open(os.path.join(HISTORY_PATH, file), "r", encoding="utf-8") as f:
+                data = json.load(f)
+            wav_path = os.path.join(HISTORY_PATH, data["output_audio"])
+            if os.path.exists(wav_path):
+                items.append(
+                    (data["timestamp"], data["gen_text"], wav_path)
+                )
+    if not items:
+        return "Chưa có lịch sử nào."
+    html = "<h4>Lịch sử đã tạo:</h4><ul>"
+    for t, text, path in reversed(items):
+        rel = os.path.basename(path)
+        html += f"<li><b>{t}</b>: {text} - <a href='file/{path}' download='{rel}'>Tải</a></li>"
+    html += "</ul>"
+    return html
+# === Giao diện Gradio ===
+with gr.Blocks(title="NeuTTS-Air☁️ CPU Mode + Auto History") as demo:
+    gr.Markdown("## 🌀 NeuTTS-Air (CPU Mode) — Lưu tự động & tải lại lịch sử")
+    with gr.Row():
+        ref_text = gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT)
+        ref_audio = gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH)
+        gen_text = gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT)
+    output_audio = gr.Audio(type="numpy", label="Generated Speech")
+    info_text = gr.Textbox(label="Kết quả / Trạng thái")
+    gen_btn = gr.Button("🎤 Generate & Save")
+    gen_btn.click(infer, inputs=[ref_text, ref_audio, gen_text], outputs=[output_audio, info_text, info_text])
+    hist_html = gr.HTML()
+    reload_btn = gr.Button("🔄 Tải lại lịch sử")
+    reload_btn.click(load_history, outputs=hist_html)
+    # Tải sẵn lịch sử khi mở
+    demo.load(load_history, outputs=hist_html)
 if __name__ == "__main__":
+    print("Chạy nền NeuTTS-Air CPU Mode (có lưu lịch sử)...")
+    demo.launch(allowed_paths=[SAMPLES_PATH, HISTORY_PATH], inbrowser=True)