Spaces:
Sleeping
Sleeping
Fouzi Takelait
commited on
Commit
·
5476c3d
1
Parent(s):
a35448d
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,21 +19,21 @@ from transformer_mt_roberta.modeling_transformer_final import TransfomerEncoderD
|
|
| 19 |
# results = translation_pipeline(text_input)
|
| 20 |
# return results[0]['translation_text']
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
|
| 38 |
def translator_fn_roberta(text_in):
|
| 39 |
source_tokenizer_pretrained_roberta = AutoTokenizer.from_pretrained("flax-community/roberta-base-danish")
|
|
@@ -50,9 +50,9 @@ def translator_fn_roberta(text_in):
|
|
| 50 |
)
|
| 51 |
return target_tokenizer_pretrained_roberta.decode(output_ids_pretrained_roberta[0])
|
| 52 |
|
| 53 |
-
iface = gr.Interface(fn=translator_fn_roberta,
|
| 54 |
inputs=gr.inputs.Textbox(lines=2, placeholder=None, label="Your Danish text goes here."),
|
| 55 |
-
outputs=['text'], # a list should match the number of values returned by fn to have one input and 2 putputs.
|
| 56 |
description = "This App translates text from Danish to the English language.",
|
| 57 |
title = "Danish to English Translator App",
|
| 58 |
theme = "peach")
|
|
|
|
| 19 |
# results = translation_pipeline(text_input)
|
| 20 |
# return results[0]['translation_text']
|
| 21 |
|
| 22 |
+
def translator_fn_baseline(text_in):
|
| 23 |
+
source_tokenizer = PreTrainedTokenizerFast.from_pretrained("da_en_output_dir/da_tokenizer")
|
| 24 |
+
target_tokenizer = PreTrainedTokenizerFast.from_pretrained("da_en_output_dir/en_tokenizer")
|
| 25 |
+
model = TransfomerEncoderDecoderModel.from_pretrained("da_en_output_dir")
|
| 26 |
+
|
| 27 |
+
input_ids = source_tokenizer.encode(text_in, return_tensors="pt")
|
| 28 |
+
output_ids = model.generate(
|
| 29 |
+
input_ids,
|
| 30 |
+
max_length=10,
|
| 31 |
+
bos_token_id=target_tokenizer.bos_token_id,
|
| 32 |
+
eos_token_id=target_tokenizer.eos_token_id,
|
| 33 |
+
pad_token_id=target_tokenizer.pad_token_id,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
return target_tokenizer.decode(output_ids[0])
|
| 37 |
|
| 38 |
def translator_fn_roberta(text_in):
|
| 39 |
source_tokenizer_pretrained_roberta = AutoTokenizer.from_pretrained("flax-community/roberta-base-danish")
|
|
|
|
| 50 |
)
|
| 51 |
return target_tokenizer_pretrained_roberta.decode(output_ids_pretrained_roberta[0])
|
| 52 |
|
| 53 |
+
iface = gr.Interface(fn=[translator_fn_baseline, translator_fn_roberta],
|
| 54 |
inputs=gr.inputs.Textbox(lines=2, placeholder=None, label="Your Danish text goes here."),
|
| 55 |
+
outputs=['text', 'text'], # a list should match the number of values returned by fn to have one input and 2 putputs.
|
| 56 |
description = "This App translates text from Danish to the English language.",
|
| 57 |
title = "Danish to English Translator App",
|
| 58 |
theme = "peach")
|