CuhyTTS / app.py
amirgame197's picture
Create app.py
b522a6b verified
raw
history blame
4.59 kB
import gradio as gr
import uuid
import os
import gzip
import subprocess
import datetime
import time
import soundfile as sf
from app_utils import tts_interface, models
# === Only keep the 'برنا' model ===
borna_model = next(model for model in models if "برنا" in model[0] or "برنا" in model[2])
voice_id = borna_model[2]
voice_url = borna_model[3]
# === Paths ===
STATIC_DIR = "static"
os.makedirs(STATIC_DIR, exist_ok=True)
PHONETIC_FILE = os.path.join(STATIC_DIR, "fa_extra.txt.gz")
# === TTS Function ===
def tts(text):
# Save input text for phonemizer
input_file = os.path.join(STATIC_DIR, "input.txt")
with open(input_file, "w", encoding="utf-8") as f:
f.write(text.strip())
# Run espeak-ng to get phonemes
phonemes_result = subprocess.run(
["espeak-ng", "-v", "fa", "-x", "-q", "-f", input_file],
capture_output=True,
text=True
)
phonemes = phonemes_result.stdout.strip()
# Run TTS
(sample_rate, audio_data), _ = tts_interface(voice_id, text.strip(), '')
out_path = os.path.join(STATIC_DIR, f"{uuid.uuid4().hex}.wav")
sf.write(out_path, audio_data, samplerate=sample_rate, subtype="PCM_16")
status = f"مدل: {voice_url}\nآوانگاشت: {phonemes}"
return out_path, status
# === Phonemizer + Save ===
def phonemize(word, phonetic, task):
if task == "phonemize" or not phonetic.strip():
phoneme_target = word
else:
phoneme_target = f"[[{phonetic.strip()}]]"
input_file = os.path.join(STATIC_DIR, "input.txt")
with open(input_file, "w", encoding="utf-8") as f:
f.write(phoneme_target)
result = subprocess.run(
["espeak-ng", "-v", "fa", "-x", "-q", "-f", input_file],
capture_output=True,
text=True
)
phonemes = result.stdout.strip()
out_path = os.path.join(STATIC_DIR, f"{uuid.uuid4().hex}.wav")
subprocess.run(f'espeak-ng -v fa -w "{out_path}" -f "{input_file}"', shell=True)
if task == "send" and phonetic.strip():
with gzip.open(PHONETIC_FILE, "at", encoding="utf-8") as f:
f.write(f"{word.strip()}\t{phonetic.strip()}\n")
status = f"متن: {word}\nآوانگاشت: {phonemes}"
return out_path, phonemes, status
# === List saved words ===
def list_words():
entries = []
with gzip.open(PHONETIC_FILE, "rt", encoding="utf-8") as f:
for line in f:
if not line.startswith("//"):
parts = line.strip().split("\t")
if len(parts) == 2:
entries.append((parts[0], parts[1]))
return entries
# === Iran time without pytz ===
def get_iran_time():
offset = 3.5 * 3600
utc = time.gmtime()
iran_time = time.localtime(time.mktime(utc) + offset)
return datetime.datetime(*iran_time[:6]).strftime("%Y-%m-%d %H:%M:%S")
# Log start
with gzip.open(PHONETIC_FILE, "at", encoding="utf-8") as f:
f.write(f"// started at : {get_iran_time()}\n")
# === UI ===
with gr.Blocks(title="TTS for Persian with Bornaa") as demo:
gr.Markdown("## متن به گفتار 🗣️")
with gr.Tab("تبدیل متن به گفتار"):
text_input = gr.Textbox(label="متن فارسی", lines=2)
out_audio = gr.Audio(label="خروجی صوتی")
out_status = gr.Textbox(label="وضعیت مدل", interactive=False)
btn_tts = gr.Button("تبدیل کن")
btn_tts.click(tts, inputs=text_input, outputs=[out_audio, out_status])
with gr.Tab("ویرایش تلفظ"):
word_input = gr.Textbox(label="کلمه", placeholder="مثلاً: سلام")
phonetic_input = gr.Textbox(label="آوانویسی دلخواه (اختیاری)")
task_choice = gr.Radio(label="عملیات", choices=["phonemize", "send"], value="phonemize")
ph_audio = gr.Audio(label="صدای تولیدشده")
ph_output = gr.Textbox(label="آوانگاشت")
ph_status = gr.Textbox(label="وضعیت", interactive=False)
btn_phonemize = gr.Button("انجام")
btn_phonemize.click(phonemize,
inputs=[word_input, phonetic_input, task_choice],
outputs=[ph_audio, ph_output, ph_status])
with gr.Tab("واژه‌های اخیر"):
gr.Markdown("### واژه‌های ذخیره‌شده")
word_table = gr.Dataframe(headers=["کلمه", "آوانویسی"], datatype=["str", "str"])
btn_load_words = gr.Button("بارگذاری")
btn_load_words.click(fn=list_words, inputs=[], outputs=[word_table])
demo.launch()