File size: 4,589 Bytes
b522a6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import uuid
import os
import gzip
import subprocess
import datetime
import time
import soundfile as sf

from app_utils import tts_interface, models

# === Only keep the 'برنا' model ===
borna_model = next(model for model in models if "برنا" in model[0] or "برنا" in model[2])
voice_id = borna_model[2]
voice_url = borna_model[3]

# === Paths ===
STATIC_DIR = "static"
os.makedirs(STATIC_DIR, exist_ok=True)
PHONETIC_FILE = os.path.join(STATIC_DIR, "fa_extra.txt.gz")

# === TTS Function ===
def tts(text):
    # Save input text for phonemizer
    input_file = os.path.join(STATIC_DIR, "input.txt")
    with open(input_file, "w", encoding="utf-8") as f:
        f.write(text.strip())

    # Run espeak-ng to get phonemes
    phonemes_result = subprocess.run(
        ["espeak-ng", "-v", "fa", "-x", "-q", "-f", input_file],
        capture_output=True,
        text=True
    )
    phonemes = phonemes_result.stdout.strip()

    # Run TTS
    (sample_rate, audio_data), _ = tts_interface(voice_id, text.strip(), '')
    out_path = os.path.join(STATIC_DIR, f"{uuid.uuid4().hex}.wav")
    sf.write(out_path, audio_data, samplerate=sample_rate, subtype="PCM_16")

    status = f"مدل: {voice_url}\nآوانگاشت: {phonemes}"
    return out_path, status

# === Phonemizer + Save ===
def phonemize(word, phonetic, task):
    if task == "phonemize" or not phonetic.strip():
        phoneme_target = word
    else:
        phoneme_target = f"[[{phonetic.strip()}]]"

    input_file = os.path.join(STATIC_DIR, "input.txt")
    with open(input_file, "w", encoding="utf-8") as f:
        f.write(phoneme_target)

    result = subprocess.run(
        ["espeak-ng", "-v", "fa", "-x", "-q", "-f", input_file],
        capture_output=True,
        text=True
    )
    phonemes = result.stdout.strip()

    out_path = os.path.join(STATIC_DIR, f"{uuid.uuid4().hex}.wav")
    subprocess.run(f'espeak-ng -v fa -w "{out_path}" -f "{input_file}"', shell=True)

    if task == "send" and phonetic.strip():
        with gzip.open(PHONETIC_FILE, "at", encoding="utf-8") as f:
            f.write(f"{word.strip()}\t{phonetic.strip()}\n")

    status = f"متن: {word}\nآوانگاشت: {phonemes}"
    return out_path, phonemes, status

# === List saved words ===
def list_words():
    entries = []
    with gzip.open(PHONETIC_FILE, "rt", encoding="utf-8") as f:
        for line in f:
            if not line.startswith("//"):
                parts = line.strip().split("\t")
                if len(parts) == 2:
                    entries.append((parts[0], parts[1]))
    return entries

# === Iran time without pytz ===
def get_iran_time():
    offset = 3.5 * 3600
    utc = time.gmtime()
    iran_time = time.localtime(time.mktime(utc) + offset)
    return datetime.datetime(*iran_time[:6]).strftime("%Y-%m-%d %H:%M:%S")

# Log start
with gzip.open(PHONETIC_FILE, "at", encoding="utf-8") as f:
    f.write(f"// started at : {get_iran_time()}\n")

# === UI ===

with gr.Blocks(title="TTS for Persian with Bornaa") as demo:
    gr.Markdown("## متن به گفتار 🗣️")

    with gr.Tab("تبدیل متن به گفتار"):
        text_input = gr.Textbox(label="متن فارسی", lines=2)
        out_audio = gr.Audio(label="خروجی صوتی")
        out_status = gr.Textbox(label="وضعیت مدل", interactive=False)

        btn_tts = gr.Button("تبدیل کن")
        btn_tts.click(tts, inputs=text_input, outputs=[out_audio, out_status])

    with gr.Tab("ویرایش تلفظ"):
        word_input = gr.Textbox(label="کلمه", placeholder="مثلاً: سلام")
        phonetic_input = gr.Textbox(label="آوانویسی دلخواه (اختیاری)")
        task_choice = gr.Radio(label="عملیات", choices=["phonemize", "send"], value="phonemize")

        ph_audio = gr.Audio(label="صدای تولیدشده")
        ph_output = gr.Textbox(label="آوانگاشت")
        ph_status = gr.Textbox(label="وضعیت", interactive=False)

        btn_phonemize = gr.Button("انجام")
        btn_phonemize.click(phonemize,
                            inputs=[word_input, phonetic_input, task_choice],
                            outputs=[ph_audio, ph_output, ph_status])

    with gr.Tab("واژه‌های اخیر"):
        gr.Markdown("### واژه‌های ذخیره‌شده")
        word_table = gr.Dataframe(headers=["کلمه", "آوانویسی"], datatype=["str", "str"])
        btn_load_words = gr.Button("بارگذاری")
        btn_load_words.click(fn=list_words, inputs=[], outputs=[word_table])

demo.launch()