Spaces:

kotoba-tech
/

seamless_m4t-large-v2

Runtime error

App Files Files Community

reach-vb HF Staff

hysts HF Staff commited on Aug 23, 2023

Commit

ee0444e

1 Parent(s): 5f23a53

fix type annotation and add ngram filtering (#7)

Browse files

- Fix type annotation and add ngram_filtering (412664701c7f9488addf9005fd9a5a81a0d0179c)

Co-authored-by: hysts <hysts@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +38 -20

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
-from typing import Union
 import gradio as gr
 import numpy as np
 import torch
@@ -49,14 +50,14 @@ translator = Translator(
 def predict(
     task_name: str,
     audio_source: str,
-    input_audio_mic: Union[str, None],
-    input_audio_file: Union[str, None],
-    input_text: Union[str, None],
-    source_language: Union[str, None],
     target_language: str,
-) -> tuple[Union[tuple[int, np.ndarray], None], str]:
     task_name = task_name.split()[0]
-    source_language_code = LANGUAGE_NAME_TO_CODE.get(source_language, None)
     target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
     if task_name in ["S2ST", "S2TT", "ASR"]:
@@ -79,6 +80,7 @@ def predict(
         task_str=task_name,
         tgt_lang=target_language_code,
         src_lang=source_language_code,
     )
     if task_name in ["S2ST", "T2ST"]:
         return (sr, wav.cpu().detach().numpy()), text_out
@@ -86,7 +88,7 @@ def predict(
         return None, text_out
-def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[str, str]:
     return predict(
         task_name="S2ST",
         audio_source="file",
@@ -98,7 +100,7 @@ def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[s
     )
-def process_s2tt_example(input_audio_file: str, target_language: str) -> tuple[str, str]:
     return predict(
         task_name="S2TT",
         audio_source="file",
@@ -110,7 +112,9 @@ def process_s2tt_example(input_audio_file: str, target_language: str) -> tuple[s
     )
-def process_t2st_example(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
     return predict(
         task_name="T2ST",
         audio_source="",
@@ -122,7 +126,9 @@ def process_t2st_example(input_text: str, source_language: str, target_language:
     )
-def process_t2tt_example(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
     return predict(
         task_name="T2TT",
         audio_source="",
@@ -134,7 +140,7 @@ def process_t2tt_example(input_text: str, source_language: str, target_language:
     )
-def process_asr_example(input_audio_file: str, target_language: str) -> tuple[str, str]:
     return predict(
         task_name="ASR",
         audio_source="file",
@@ -317,10 +323,16 @@ with gr.Blocks(css="style.css") as demo:
             examples=[
                 ["My favorite animal is the elephant.", "English", "French"],
                 ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
-                ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
-                 "English", "Hindi"],
-                ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
-                 "English", "Spanish"],
             ],
             inputs=[input_text, source_language, target_language],
             outputs=[output_audio, output_text],
@@ -332,10 +344,16 @@ with gr.Blocks(css="style.css") as demo:
             examples=[
                 ["My favorite animal is the elephant.", "English", "French"],
                 ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
-                ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
-                 "English", "Hindi"],
-                ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
-                 "English", "Spanish"],
             ],
             inputs=[input_text, source_language, target_language],
             outputs=[output_audio, output_text],

+from __future__ import annotations
 import os
 import gradio as gr
 import numpy as np
 import torch
 def predict(
     task_name: str,
     audio_source: str,
+    input_audio_mic: str | None,
+    input_audio_file: str | None,
+    input_text: str | None,
+    source_language: str | None,
     target_language: str,
+) -> tuple[tuple[int, np.ndarray] | None, str]:
     task_name = task_name.split()[0]
+    source_language_code = LANGUAGE_NAME_TO_CODE[source_language] if source_language else None
     target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
     if task_name in ["S2ST", "S2TT", "ASR"]:
         task_str=task_name,
         tgt_lang=target_language_code,
         src_lang=source_language_code,
+        ngram_filtering=True,
     )
     if task_name in ["S2ST", "T2ST"]:
         return (sr, wav.cpu().detach().numpy()), text_out
         return None, text_out
+def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
     return predict(
         task_name="S2ST",
         audio_source="file",
     )
+def process_s2tt_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
     return predict(
         task_name="S2TT",
         audio_source="file",
     )
+def process_t2st_example(
+    input_text: str, source_language: str, target_language: str
+) -> tuple[tuple[int, np.ndarray] | None, str]:
     return predict(
         task_name="T2ST",
         audio_source="",
     )
+def process_t2tt_example(
+    input_text: str, source_language: str, target_language: str
+) -> tuple[tuple[int, np.ndarray] | None, str]:
     return predict(
         task_name="T2TT",
         audio_source="",
     )
+def process_asr_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
     return predict(
         task_name="ASR",
         audio_source="file",
             examples=[
                 ["My favorite animal is the elephant.", "English", "French"],
                 ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
+                [
+                    "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                    "English",
+                    "Hindi",
+                ],
+                [
+                    "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                    "English",
+                    "Spanish",
+                ],
             ],
             inputs=[input_text, source_language, target_language],
             outputs=[output_audio, output_text],
             examples=[
                 ["My favorite animal is the elephant.", "English", "French"],
                 ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
+                [
+                    "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                    "English",
+                    "Hindi",
+                ],
+                [
+                    "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                    "English",
+                    "Spanish",
+                ],
             ],
             inputs=[input_text, source_language, target_language],
             outputs=[output_audio, output_text],