multi_parler_tts

Running on Zero

ylacombe commited on Apr 26, 2024

Commit

a07119c

verified ·

1 Parent(s): 2c30452

Add Number Normalization and other fix (#8)

- Add Number Normalization and other fix (516bd700f5b25af6edb3a3137e6232af3ac64375)
- Update app.py (25da4ce1606f14882a69312fb8f597c079d3e5f1)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import spaces
 import gradio as gr
 import torch
 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
@@ -38,11 +42,31 @@ examples = [
     ],
 ]
-@spaces.GPU
 def gen_tts(text, description):
     inputs = tokenizer(description, return_tensors="pt").to(device)
-    prompt = tokenizer(text, return_tensors="pt").to(device)
     set_seed(SEED)
     generation = model.generate(
@@ -145,4 +169,4 @@ with gr.Blocks(css=css) as block:
     )
 block.queue()
-block.launch(share=True)

 import spaces
 import gradio as gr
 import torch
+from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
+from string import punctuation
+import re
 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
     ],
 ]
+number_normalizer = EnglishNumberNormalizer()
+def preprocess(text):
+    text = number_normalizer(text).strip()
+    text = text.replace("-", " ")
+    if text[-1] not in punctuation:
+        text = f"{text}."
+    abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
+    def separate_abb(chunk):
+        chunk = chunk.replace(".","")
+        print(chunk)
+        return " ".join(chunk)
+    abbreviations = re.findall(abbreviations_pattern, text)
+    for abv in abbreviations:
+        if abv in text:
+            text = text.replace(abv, separate_abb(abv))
+    return text
 def gen_tts(text, description):
     inputs = tokenizer(description, return_tensors="pt").to(device)
+    prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
     set_seed(SEED)
     generation = model.generate(
     )
 block.queue()
+block.launch(share=True)