Spaces:
Sleeping
Sleeping
| import warnings | |
| from cryptography.utils import CryptographyDeprecationWarning | |
| with warnings.catch_warnings(): | |
| warnings.filterwarnings('ignore', category=CryptographyDeprecationWarning) | |
| import paramiko | |
| import gradio as gr | |
| #from transformers import pipeline | |
| from transformers import PreTrainedTokenizerFast, AutoTokenizer | |
| from transformers import PreTrainedTokenizerFast | |
| from transformer_mt.modeling_transformer import TransfomerEncoderDecoderModel | |
| from transformer_mt_roberta.modeling_transformer_final import TransfomerEncoderDecoderModel as mt_roberta | |
| #translation_pipeline = pipeline('translation_en_to_fr') | |
| # seting up translation transformer into Gradio | |
| #def translator_fn(text_input): | |
| # results = translation_pipeline(text_input) | |
| # return results[0]['translation_text'] | |
| def translator_fn_baseline(text_in): | |
| source_tokenizer = PreTrainedTokenizerFast.from_pretrained("da_en_output_dir/da_tokenizer") | |
| target_tokenizer = PreTrainedTokenizerFast.from_pretrained("da_en_output_dir/en_tokenizer") | |
| model = TransfomerEncoderDecoderModel.from_pretrained("da_en_output_dir") | |
| input_ids = source_tokenizer.encode(text_in, return_tensors="pt") | |
| output_ids = model.generate( | |
| input_ids, | |
| max_length=len(text_in.split())+3, | |
| bos_token_id=target_tokenizer.bos_token_id, | |
| eos_token_id=target_tokenizer.eos_token_id, | |
| pad_token_id=target_tokenizer.pad_token_id, | |
| ) | |
| return target_tokenizer.decode(output_ids[0]) | |
| def translator_fn_roberta(text_in): | |
| source_tokenizer_pretrained_roberta = AutoTokenizer.from_pretrained("flax-community/roberta-base-danish") | |
| target_tokenizer_pretrained_roberta = PreTrainedTokenizerFast.from_pretrained("da_en_output_dir/en_tokenizer") | |
| model_pretrained_roberta = mt_roberta.from_pretrained("da_en_RoBERTa_pretrained") | |
| input_ids_pretrained_roberta = source_tokenizer_pretrained_roberta.encode(text_in, return_tensors="pt") | |
| output_ids_pretrained_roberta = model_pretrained_roberta.generate( | |
| input_ids_pretrained_roberta, | |
| max_length=len(text_in.split())+3, | |
| bos_token_id=target_tokenizer_pretrained_roberta.bos_token_id, | |
| eos_token_id=target_tokenizer_pretrained_roberta.eos_token_id, | |
| pad_token_id=target_tokenizer_pretrained_roberta.pad_token_id, | |
| ) | |
| return target_tokenizer_pretrained_roberta.decode(output_ids_pretrained_roberta[0]) | |
| iface = gr.Interface(fn=[translator_fn_baseline, translator_fn_roberta], | |
| inputs=gr.inputs.Textbox(lines=2, placeholder=None, label="Your Danish text goes here."), | |
| outputs=['text'], # a list should match the number of values returned by fn to have one input and 2 putputs. | |
| description = "This App translates text from Danish to the English language.", | |
| title = "Danish to English Translator App", | |
| theme = "peach") | |
| iface.launch(share=False, enable_queue=True) | |