Spaces:
Running
Running
| """ | |
| This file contains the functions to translate the text from one language to another. | |
| """ | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator | |
| from .text_preprocess import decontracting_words, space_punc | |
| from dotenv import load_dotenv | |
| import os | |
| # Load the environment variables from the .env file | |
| load_dotenv() | |
| # Translators API Keys | |
| MICROSOFT_API_KEY = os.getenv("MICROSOFT_TRANSLATOR_KEY") | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| YANDEX_API_KEY = os.getenv("YANDEX_API_KEY") | |
| # Digit Translation | |
| digit_converter = { | |
| '০': '0', | |
| '১': '1', | |
| '২': '2', | |
| '৩': '3', | |
| '৪': '4', | |
| '৫': '5', | |
| '৬': '6', | |
| '৭': '7', | |
| '৮': '8', | |
| '৯': '9' | |
| } | |
| def get_translated_digit(sentence): | |
| """ | |
| Translate the digits from Bengali to English | |
| """ | |
| translated_sentence = [] | |
| for each_letter in sentence: | |
| if each_letter in digit_converter.keys(): | |
| translated_sentence.append(digit_converter[each_letter]) | |
| # print(digit_converter[each_letter], end="") | |
| else: | |
| translated_sentence.append(each_letter) | |
| # print(each_letter, end="") | |
| return "".join(each for each in translated_sentence) | |
| # Bangla to English Translation (BUET BanglaNMT) | |
| translation_model_bn_en = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/banglat5_nmt_bn_en") | |
| translation_tokenizer_bn_en = AutoTokenizer.from_pretrained("csebuetnlp/banglat5_nmt_bn_en") | |
| def banglanmt_translation(input_text): | |
| """ | |
| Translate a sentence from Bengali to English using BUET BanglaNMT | |
| """ | |
| inputs = translation_tokenizer_bn_en(input_text, return_tensors="pt") | |
| outputs = translation_model_bn_en.generate(**inputs) | |
| translated_text = translation_tokenizer_bn_en.decode(outputs[0], skip_special_tokens=True) | |
| return translated_text | |
| def google_translation(sentence: str, source="bn", target="en") -> str: | |
| """ | |
| Translate a sentence from one language to another using Google Translator.\n | |
| At first install dependencies \n | |
| `!pip install -U deep-translator` | |
| """ | |
| translator = GoogleTranslator() | |
| translated_sentence = translator.translate( | |
| sentence, source=source, target=target) | |
| return translated_sentence | |
| def microsoft_translation(sentence: str, source="bn", target="en") -> str: | |
| """ | |
| Translate a sentence from one language to another using Microsoft Translator.\n | |
| At first install dependencies \n | |
| `!pip install -U deep-translator` | |
| """ | |
| translator = MicrosoftTranslator(api_key=MICROSOFT_API_KEY, target='en') | |
| translated_sentence = translator.translate(sentence) | |
| return translated_sentence | |
| def chatgpt_translation(sentence: str, source="bn", target="en") -> str: | |
| """ | |
| Translate a sentence from one language to another using ChatGPT Translator.\n | |
| At first install dependencies \n | |
| `!pip install -U deep-translator` | |
| """ | |
| translator = ChatGptTranslator(api_key=OPENAI_API_KEY, target=target) | |
| translated_sentence = translator.translate(sentence) | |
| return translated_sentence | |
| def yandex_translation(sentence: str, source="bn", target="en") -> str: | |
| """ | |
| Translate a sentence from one language to another using Yandex Translator.\n | |
| At first install dependencies \n | |
| `!pip install -U deep-translator` | |
| """ | |
| translator = YandexTranslator(api_key=YANDEX_API_KEY) | |
| translated_sentence = translator.translate( | |
| sentence, source=source, target=target) | |
| return translated_sentence | |
| def mymemory_translation(sentence: str, source="bn-IN", target="en-US") -> str: | |
| """ | |
| Translate a sentence from one language to another using MyMemory Translator.\n | |
| At first install dependencies \n | |
| `!pip install -U deep-translator` | |
| """ | |
| translator = MyMemoryTranslator(source=source, target=target) | |
| translated_sentence = translator.translate(sentence) | |
| return translated_sentence | |
| def get_better_translation(translator_func, src=""): | |
| src_mod = get_translated_digit(src) | |
| tgt = translator_func(src_mod) | |
| tgt = decontracting_words(tgt) | |
| tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas') | |
| return tgt | |
| def select_translator(src, translator): | |
| """ | |
| Select the translator | |
| """ | |
| tgt = None | |
| tgt_base = None | |
| if translator == "Google": | |
| tgt = get_better_translation(google_translation, src) | |
| tgt = space_punc(tgt) | |
| tgt_base = google_translation(src) | |
| elif translator == "BanglaNMT": | |
| tgt = get_better_translation(banglanmt_translation, src) | |
| tgt = space_punc(tgt) | |
| tgt_base = banglanmt_translation(src) | |
| elif translator == "MyMemory": | |
| tgt = get_better_translation(mymemory_translation, src) | |
| tgt = space_punc(tgt) | |
| tgt_base = mymemory_translation(src) | |
| return tgt_base, tgt | |