StorageDater commited on
Commit
d76158a
·
verified ·
1 Parent(s): 2617f51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -38
app.py CHANGED
@@ -1,17 +1,27 @@
1
  import spaces
2
  import os
3
  import sys
4
- sys.path.append("neutts-air")
5
- from neuttsair.neutts import NeuTTSAir
6
  import numpy as np
7
  import gradio as gr
 
 
 
 
 
8
 
9
- SAMPLES_PATH = os.path.join(os.getcwd(), "neutts-air", "samples")
10
- DEFAULT_REF_TEXT = "So I'm live on radio. And I say, well, my dear friend James here clearly, and the whole room just froze. Turns out I'd completely misspoken and mentioned our other friend."
 
 
 
 
 
11
  DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
12
  DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."
13
 
14
- # --- Force CPU usage ---
15
  tts = NeuTTSAir(
16
  backbone_repo="neuphonic/neutts-air",
17
  backbone_device="cpu",
@@ -19,42 +29,82 @@ tts = NeuTTSAir(
19
  codec_device="cpu"
20
  )
21
 
22
- def infer(
23
- ref_text: str,
24
- ref_audio_path: str,
25
- gen_text: str,
26
- ) -> tuple[int, np.ndarray]:
27
- """
28
- Generates speech using NeuTTS-Air given a reference audio and text, and new text to synthesize.
29
-
30
- Args:
31
- ref_text (str): The text corresponding to the reference audio.
32
- ref_audio_path (str): The file path to the reference audio.
33
- gen_text (str): The new text to synthesize.
34
- Returns:
35
- tuple [int, np.ndarray]: A tuple containing the sample rate (24000) and the generated audio waveform as a numpy array.
36
- """
37
-
38
- gr.Info("Starting inference request (CPU mode)!")
39
- gr.Info("Encoding reference...")
40
  ref_codes = tts.encode_reference(ref_audio_path)
41
 
42
- gr.Info(f"Generating audio for input text: {gen_text}")
43
  wav = tts.infer(gen_text, ref_codes, ref_text)
 
44
 
45
- return (24_000, wav)
46
-
47
- demo = gr.Interface(
48
- fn=infer,
49
- inputs=[
50
- gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT),
51
- gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH),
52
- gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT),
53
- ],
54
- outputs=gr.Audio(type="numpy", label="Generated Speech"),
55
- title="NeuTTS-Air☁️ (CPU Mode)",
56
- description="Upload a reference audio sample, provide the reference text, and enter new text to synthesize (running on CPU)."
57
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  if __name__ == "__main__":
60
- demo.launch(allowed_paths=[SAMPLES_PATH], mcp_server=True, inbrowser=True)
 
 
1
  import spaces
2
  import os
3
  import sys
4
+ import time
5
+ import json
6
  import numpy as np
7
  import gradio as gr
8
+ import soundfile as sf
9
+ from datetime import datetime
10
+
11
+ sys.path.append("neutts-air")
12
+ from neuttsair.neutts import NeuTTSAir
13
 
14
+ # === Đường dẫn cơ bản ===
15
+ BASE_PATH = os.getcwd()
16
+ SAMPLES_PATH = os.path.join(BASE_PATH, "neutts-air", "samples")
17
+ HISTORY_PATH = os.path.join(BASE_PATH, "history")
18
+ os.makedirs(HISTORY_PATH, exist_ok=True)
19
+
20
+ DEFAULT_REF_TEXT = "So I'm live on radio..."
21
  DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
22
  DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."
23
 
24
+ # === Khởi tạo NeuTTS-Air ở CPU ===
25
  tts = NeuTTSAir(
26
  backbone_repo="neuphonic/neutts-air",
27
  backbone_device="cpu",
 
29
  codec_device="cpu"
30
  )
31
 
32
+ # === Hàm xử lý chính ===
33
+ def infer(ref_text, ref_audio_path, gen_text):
34
+ gr.Info("Bắt đầu xử lý (CPU mode)...")
35
+
36
+ # hóa reference
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  ref_codes = tts.encode_reference(ref_audio_path)
38
 
39
+ # Sinh âm thanh mới
40
  wav = tts.infer(gen_text, ref_codes, ref_text)
41
+ sr = 24000
42
 
43
+ # === Lưu vào history ===
44
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
45
+ fname = f"tts_{timestamp}.wav"
46
+ fpath = os.path.join(HISTORY_PATH, fname)
47
+
48
+ sf.write(fpath, wav, sr)
49
+
50
+ # Lưu metadata
51
+ meta = {
52
+ "timestamp": timestamp,
53
+ "ref_text": ref_text,
54
+ "gen_text": gen_text,
55
+ "ref_audio": os.path.basename(ref_audio_path),
56
+ "output_audio": fname,
57
+ }
58
+
59
+ with open(os.path.join(HISTORY_PATH, f"{timestamp}.json"), "w", encoding="utf-8") as f:
60
+ json.dump(meta, f, ensure_ascii=False, indent=2)
61
+
62
+ gr.Info(f"Đã lưu file: {fpath}")
63
+ return sr, wav, f"Lưu thành công: {fname}"
64
+
65
+ # === Hàm tải lại lịch sử ===
66
+ def load_history():
67
+ items = []
68
+ for file in sorted(os.listdir(HISTORY_PATH)):
69
+ if file.endswith(".json"):
70
+ with open(os.path.join(HISTORY_PATH, file), "r", encoding="utf-8") as f:
71
+ data = json.load(f)
72
+ wav_path = os.path.join(HISTORY_PATH, data["output_audio"])
73
+ if os.path.exists(wav_path):
74
+ items.append(
75
+ (data["timestamp"], data["gen_text"], wav_path)
76
+ )
77
+ if not items:
78
+ return "Chưa có lịch sử nào."
79
+ html = "<h4>Lịch sử đã tạo:</h4><ul>"
80
+ for t, text, path in reversed(items):
81
+ rel = os.path.basename(path)
82
+ html += f"<li><b>{t}</b>: {text} - <a href='file/{path}' download='{rel}'>Tải</a></li>"
83
+ html += "</ul>"
84
+ return html
85
+
86
+ # === Giao diện Gradio ===
87
+ with gr.Blocks(title="NeuTTS-Air☁️ CPU Mode + Auto History") as demo:
88
+ gr.Markdown("## 🌀 NeuTTS-Air (CPU Mode) — Lưu tự động & tải lại lịch sử")
89
+
90
+ with gr.Row():
91
+ ref_text = gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT)
92
+ ref_audio = gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH)
93
+ gen_text = gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT)
94
+
95
+ output_audio = gr.Audio(type="numpy", label="Generated Speech")
96
+ info_text = gr.Textbox(label="Kết quả / Trạng thái")
97
+
98
+ gen_btn = gr.Button("🎤 Generate & Save")
99
+ gen_btn.click(infer, inputs=[ref_text, ref_audio, gen_text], outputs=[output_audio, info_text, info_text])
100
+
101
+ hist_html = gr.HTML()
102
+ reload_btn = gr.Button("🔄 Tải lại lịch sử")
103
+ reload_btn.click(load_history, outputs=hist_html)
104
+
105
+ # Tải sẵn lịch sử khi mở
106
+ demo.load(load_history, outputs=hist_html)
107
 
108
  if __name__ == "__main__":
109
+ print("Chạy nền NeuTTS-Air CPU Mode (có lưu lịch sử)...")
110
+ demo.launch(allowed_paths=[SAMPLES_PATH, HISTORY_PATH], inbrowser=True)