Spaces:

naacl-anonymous
/

selective_pre_translation

Runtime error

App Files Files Community

Anonymous commited on Oct 1, 2024

Commit

15f5208

1 Parent(s): 707f578

more fixes

Browse files

Files changed (6) hide show

app.py +1 -1
generate_prompt.py +21 -16
tasks/ner.py +138 -37
tasks/nli.py +15 -18
tasks/qa.py +49 -14
tasks/summarization.py +30 -5

app.py CHANGED Viewed

@@ -131,6 +131,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         elif task == NER:
             text_example = {
                 'tokens': sentence,
             }
         else:
             text_example = {
@@ -138,7 +139,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 'premise': premise
             }
-        print(text_example)
         prompt = construct_generic_prompt(task, instruction, text_example, zero_shot, num_examples, selected_language, dataset, config)
         return prompt

         elif task == NER:
             text_example = {
                 'tokens': sentence,
+                'ner_tags': ''
             }
         else:
             text_example = {
                 'premise': premise
             }
         prompt = construct_generic_prompt(task, instruction, text_example, zero_shot, num_examples, selected_language, dataset, config)
         return prompt

generate_prompt.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import csv
 import enum
 import json
@@ -20,7 +21,7 @@ from langchain.prompts import FewShotPromptTemplate, PromptTemplate
 from tqdm import tqdm
 from yaml.loader import SafeLoader
-from tasks import qa, summarization, ner, nli
 # from models.model_completion import gpt3x_completion, gemini_completion
@@ -47,8 +48,12 @@ def gpt3x_completion(
 ) -> str:
     import os
     import openai
-    os.environ["OPENAI_API_KEY"] = ''
     def get_entities_chatGPT(final_prompt):
         response = openai.ChatCompletion.create(
@@ -67,7 +72,7 @@ def mixtral_completion(prompt):
     url = "https://api.together.xyz/v1/chat/completions"
     # Define your Together API key
-    together_api_key = ""  # Replace with your actual API key
     # Define the request payload
     payload = {
@@ -554,6 +559,7 @@ def construct_generic_prompt(task, instruction, test_example, zero_shot, num_exa
             instruction=instruction,
             test_example=test_example,
             zero_shot=zero_shot,
             num_examples=num_examples,
             lang=str(selected_language).lower(),
             config=config,
@@ -612,31 +618,30 @@ class Config:
 def recommend_config(task, lang, model_type):
     print(task)
     print(model_type)
     language_type = _get_language_type(lang)
-    config = Config()
     print(language_type)
     if task == QA:
         if model_type == ModelType.English.value:
-            config.set(prefix='source', context='source', examples='source', output='source')
         else:
-            config.set(prefix='english', context='source', examples='source', output='source')
     if task == NER:
         if model_type == ModelType.English.value:
-            config.set(prefix='source', context='source', examples='source', output='source')
         elif language_type == LanguageType.High:
-            config.set(prefix='english', context='source', examples='source', output='source')
         else:
-            config.set(prefix='english', context='source', examples='source', output='english')
     if task == NLI:
         if model_type == ModelType.English.value:
-            config.set(prefix='source', context='source', examples='source', output='source')
         elif language_type == LanguageType.High:
-            print("here")
-            config.set(prefix='english', context='source', examples='english')
         else:
-            print("here1")
-            config.set(prefix='english', context='english', examples='english')
     if task == SUMMARIZATION:
-        config.set(context='english')
     return config.to_dict()

+import collections
 import csv
 import enum
 import json
 from tqdm import tqdm
 from yaml.loader import SafeLoader
+from tasks import ner, summarization, qa, nli
 # from models.model_completion import gpt3x_completion, gemini_completion
 ) -> str:
     import os
     import openai
+    os.environ["OPENAI_API_KEY"] = '07d805ec4fbd484ebc923a3a41e1773d'
+    OPENAI_API_KEY = '07d805ec4fbd484ebc923a3a41e1773d'
+    openai.api_type = "azure"
+    openai.api_base = 'https://hebsum-itaim-uks.openai.azure.com/'
+    openai.api_version = "2023-03-15-preview"
+    openai.api_key = '07d805ec4fbd484ebc923a3a41e1773d'
     def get_entities_chatGPT(final_prompt):
         response = openai.ChatCompletion.create(
     url = "https://api.together.xyz/v1/chat/completions"
     # Define your Together API key
+    together_api_key = "851cfc39f3d7a246a2342259f5f6fbba4721c6002123365fba2254c9c9c424ad"  # Replace with your actual API key
     # Define the request payload
     payload = {
             instruction=instruction,
             test_example=test_example,
             zero_shot=zero_shot,
+            dataset=dataset,
             num_examples=num_examples,
             lang=str(selected_language).lower(),
             config=config,
 def recommend_config(task, lang, model_type):
     print(task)
     print(model_type)
+    print(lang)
     language_type = _get_language_type(lang)
+    config = Config(lang, lang, lang, lang)
     print(language_type)
     if task == QA:
         if model_type == ModelType.English.value:
+            config.set(prefix=lang, context=lang, examples=lang, output=lang)
         else:
+            config.set(prefix='English', context=lang, examples=lang, output=lang)
     if task == NER:
         if model_type == ModelType.English.value:
+            config.set(prefix=lang, context=lang, examples=lang, output=lang)
         elif language_type == LanguageType.High:
+            config.set(prefix='English', context=lang, examples=lang, output=lang)
         else:
+            config.set(prefix='English', context=lang, examples=lang, output='English')
     if task == NLI:
         if model_type == ModelType.English.value:
+            config.set(prefix=lang, context=lang, examples=lang, output=lang)
         elif language_type == LanguageType.High:
+            config.set(prefix='English', context=lang, examples='English')
         else:
+            config.set(prefix='English', context='English', examples='English')
     if task == SUMMARIZATION:
+        config.set(context='English')
     return config.to_dict()

tasks/ner.py CHANGED Viewed

@@ -1,33 +1,56 @@
-from typing import List, Dict, Any
 from easygoogletranslate import EasyGoogleTranslate
 from langchain.prompts import PromptTemplate, FewShotPromptTemplate
-LANGUAGE_TO_GOOGLE_TRANSLATE_MARK = {
     "english": "en",
-    "bambara": "bm",
-    "ewe": "ee",
-    "hausa": "ha",
-    "igbo": "ig",
-    "kinyarwanda": "rw",
-    "chichewa": "ny",
-    "twi": "ak",
-    "yoruba": "yo",
-    "slovak": "sk",
-    "serbian": "sr",
-    "swedish": "sv",
     "vietnamese": "vi",
     "italian": "it",
-    "portuguese": "pt",
-    "chinese": "zh",
-    "english": "en",
-    "french": "fr"
-}
-LANGAUGE_TO_PREFIX = {
     "bambara": "bam",
     "ewe": "ewe",
     "fon": "fon",
@@ -58,7 +81,7 @@ def _translate_instruction(basic_instruction: str, target_language: str) -> str:
     return translator.translate(basic_instruction)
-def create_instruction(lang: str, expected_output: str):
     basic_instruction = f"""You are an NLP assistant whose
                             purpose is to perform Named Entity Recognition
                             (NER). You will need to give each entity a tag, from the following:
@@ -69,11 +92,90 @@ def create_instruction(lang: str, expected_output: str):
                             The entities should be in {expected_output} language"""
     return (
-        basic_instruction
         if lang == "english"
         else _translate_instruction(basic_instruction, target_language=lang)
     )
 def construct_prompt(
     instruction: str,
     test_example: dict,
@@ -84,20 +186,21 @@ def construct_prompt(
     config: Dict[str, str],
 ):
     if not instruction:
-        print(lang)
-        instruction = create_instruction(lang, config['prefix'])
     example_prompt = PromptTemplate(
-        input_variables=["summary", "text"], template="Text: {text}\nSummary: {summary}"
     )
-    zero_shot_template = f"""{instruction}""" + "\n Input: {text} " ""
-    test_data = load_xlsum_data(lang=lang, split="test", limit=100)
-    print(test_data)
-    print(num_examples)
-    print(lang)
     ic_examples = []
     if not zero_shot:
@@ -121,12 +224,10 @@ def construct_prompt(
         else PromptTemplate(input_variables=["text"], template=zero_shot_template)
     )
-    print("lang", lang)
-    print(config["input"] , lang)
     if config["input"] != lang:
         test_example = _translate_example(
             example=test_example, src_language=lang, target_language=config["input"]
         )
-    print("test_example", prompt)
-    return prompt.format(text=test_example["text"])

+from typing import List, Dict, Any, Union
+import numpy as np
+from datasets import load_dataset, Dataset
 from easygoogletranslate import EasyGoogleTranslate
 from langchain.prompts import PromptTemplate, FewShotPromptTemplate
+LANGAUGE_TO_PREFIX = {
+    "chinese_simplified": "zh-CN",
+    "french": "fr",
+    "portuguese": "pt",
     "english": "en",
+    "arabic": "ar",
+    "hindi": "hi",
+    "indonesian": "id",
+    "amharic": "am",
+    "bengali": "bn",
+    "burmese": "my",
+    "uzbek": "uz",
+    "nepali": "ne",
+    "japanese": "ja",
+    "spanish": "es",
+    "turkish": "tr",
+    "persian": "fa",
+    "azerbaijani": "az",
+    "korean": "ko",
+    "hebrew": "he",
+    "telugu": "te",
+    "german": "de",
+    "greek": "el",
+    "tamil": "ta",
+    "assamese": "as",
     "vietnamese": "vi",
+    "russian": "ru",
+    "romanian": "ro",
+    "malayalam": "ml",
+    "swahili": "sw",
+    "bulgarian": "bg",
+    "thai": "th",
+    "urdu": "ur",
     "italian": "it",
+    "polish": "pl",
+    "dutch": "nl",
+    "swedish": "sv",
+    "danish": "da",
+    "norwegian": "no",
+    "finnish": "fi",
+    "hungarian": "hu",
+    "czech": "cs",
+    "slovak": "sk",
+    "ukrainian": "uk",
     "bambara": "bam",
     "ewe": "ewe",
     "fon": "fon",
     return translator.translate(basic_instruction)
+def create_instruction(lang: str, instruction_language: str, expected_output: str):
     basic_instruction = f"""You are an NLP assistant whose
                             purpose is to perform Named Entity Recognition
                             (NER). You will need to give each entity a tag, from the following:
                             The entities should be in {expected_output} language"""
     return (
+        instruction_language
         if lang == "english"
         else _translate_instruction(basic_instruction, target_language=lang)
     )
+def load_wikiann_dataset(lang, split, limit):
+    """Loads the xlsum dataset"""
+    dataset = load_dataset("wikiann", LANGAUGE_TO_PREFIX[lang])[split]
+    return dataset.select(np.arange(limit))
+def _translate_example(
+        example: Dict[str, str], src_language: str, target_language: str
+):
+    translator = EasyGoogleTranslate(
+        source_language=LANGAUGE_TO_PREFIX[src_language],
+        target_language=LANGAUGE_TO_PREFIX[target_language],
+        timeout=30,
+    )
+    return {
+        "tokens": translator.translate(str(example["tokens"])),
+        "ner_tags": translator.translate(str(example["ner_tags"]))
+    }
+def choose_few_shot_examples(
+        train_dataset: Dataset,
+        few_shot_size: int,
+        context: List[str],
+        selection_criteria: str,
+        lang: str,
+) -> List[Dict[str, Union[str, int]]]:
+    """Selects few-shot examples from training datasets
+    Args:
+        train_dataset (Dataset): Training Dataset
+        few_shot_size (int): Number of few-shot examples
+        selection_criteria (few_shot_selection): How to select few-shot examples. Choices: [random, first_k]
+    Returns:
+        List[Dict[str, Union[str, int]]]: Selected examples
+    """
+    selected_examples = []
+    example_idxs = []
+    if selection_criteria == "first_k":
+        example_idxs = list(range(few_shot_size))
+    elif selection_criteria == "random":
+        example_idxs = (
+            np.random.choice(len(train_dataset), size=few_shot_size, replace=True)
+            .astype(int)
+            .tolist()
+        )
+    ic_examples = [train_dataset[idx] for idx in example_idxs]
+    ic_examples = [
+        {
+            "tokens": ' '.join(example['tokens']),
+            "ner_tags": example['spans']
+        }
+        for example in ic_examples
+    ]
+    for idx, ic_language in enumerate(context):
+        (
+            selected_examples.append(ic_examples[idx])
+            if ic_language == lang
+            else (
+                selected_examples.append(
+                    _translate_example(
+                        example=ic_examples[idx],
+                        src_language=lang,
+                        target_language=ic_language,
+                    )
+                )
+            )
+        )
+    return selected_examples
 def construct_prompt(
     instruction: str,
     test_example: dict,
     config: Dict[str, str],
 ):
     if not instruction:
+        instruction = create_instruction(lang, config['prefix'], config['output'])
     example_prompt = PromptTemplate(
+        input_variables=["tokens", "ner_tags"],
+        template="Sentence: {tokens}\nNer Tags: {ner_tags}",
     )
+    zero_shot_template = f"""{instruction}""" + "\n Sentence: {text} " ""
+    try:
+        test_data = load_wikiann_dataset(lang=lang, split="test", limit=500)
+    except Exception as e:
+        raise KeyError(f"{lang} is not supported in 'wikiAnn' dataset, choose supported language in few-shot")
     ic_examples = []
     if not zero_shot:
         else PromptTemplate(input_variables=["text"], template=zero_shot_template)
     )
     if config["input"] != lang:
         test_example = _translate_example(
             example=test_example, src_language=lang, target_language=config["input"]
         )
+    print(test_example)
+    return prompt.format(text=test_example["tokens"])

tasks/nli.py CHANGED Viewed

@@ -432,15 +432,14 @@ def process_test_example(
 def construct_prompt(
-    instruction: str,
-    test_example: dict,
-    zero_shot: bool,
-    num_examples: int,
-    lang: str,
-    config: Dict[str, str],
-    dataset_name: str = 'xnli'
 ):
     if not instruction:
         print(lang)
         instruction = create_instruction(lang)
@@ -451,17 +450,16 @@ def construct_prompt(
     )
     zero_shot_template = (
-        f"""{instruction}""" + "\n Hypothesis: {hypothesis} + \n  Premise: {premise}" ""
     )
-    test_data = load_xnli_dataset(dataset_name, lang, split="test", limit=100)
-    print(test_data)
-    print(num_examples)
-    print(lang)
     ic_examples = []
     if not zero_shot:
         ic_examples = choose_few_shot_examples(
             train_dataset=test_data,
             few_shot_size=num_examples,
@@ -485,12 +483,11 @@ def construct_prompt(
     )
     print("lang", lang)
-    print(config["input"] , lang)
     if config["input"] != lang:
         test_example = _translate_example(
             example=test_example, src_language=lang, target_language=config["input"]
         )
     return prompt.format(
-        hypothesis=test_example["hypothesis"], premise=test_example["premise"]
-    )

 def construct_prompt(
+        instruction: str,
+        test_example: dict,
+        zero_shot: bool,
+        num_examples: int,
+        lang: str,
+        config: Dict[str, str],
+        dataset_name: str = 'xnli'
 ):
     if not instruction:
         print(lang)
         instruction = create_instruction(lang)
     )
     zero_shot_template = (
+            f"""{instruction}""" + "\n Hypothesis: {hypothesis} + \n  Premise: {premise}" ""
     )
+    if not zero_shot:
+        try:
+            test_data = load_xnli_dataset(dataset_name, lang, split="test", limit=100)
+        except KeyError as e:
+            raise KeyError(f"{lang} is not supported in {dataset_name} dataset, choose supported language in few-shot")
     ic_examples = []
     if not zero_shot:
         ic_examples = choose_few_shot_examples(
             train_dataset=test_data,
             few_shot_size=num_examples,
     )
     print("lang", lang)
+    print(config["input"], lang)
     if config["input"] != lang:
         test_example = _translate_example(
             example=test_example, src_language=lang, target_language=config["input"]
         )
     return prompt.format(
+        hypothesis=test_example["hypothesis"], premise=test_example["premise"])

tasks/qa.py CHANGED Viewed

@@ -29,7 +29,7 @@ from yaml.loader import SafeLoader
 def gemini_completion(prompt):
     # Define the endpoint URL
-    genai.configure(api_key="")
     model = genai.GenerativeModel("models/gemini-1.0-pro-latest")
     return model.generate_content(prompt).text
@@ -41,6 +41,14 @@ def gemini_completion(prompt):
 # model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
 # model.to("cuda:04")
 def get_entities_gpt3_long(prompt):
     response = openai.ChatCompletion.create(
@@ -58,8 +66,12 @@ def gpt3x_completion(
 ) -> str:
     import os
     import openai
-    os.environ["OPENAI_API_KEY"] = ''
     openai.api_type = "azure"
     def get_entities_chatGPT(final_prompt):
         response = openai.ChatCompletion.create(
@@ -84,7 +96,7 @@ def mixtral_completion(prompt):
     url = "https://api.together.xyz/v1/chat/completions"
     # Define your Together API key
-    together_api_key = ""  # Replace with your actual API key
     # Define the request payload
     payload = {
@@ -177,21 +189,43 @@ LANGUAGE_TO_SUFFIX = {
     "indonesian": "id",
     "amharic": "am",
     "bengali": "bn",
-    "telugu": "te",
     "burmese": "my",
     "german": "de",
     "greek": "el",
     "tamil": "ta",
     "assamese": "as",
-    "hindi": "hi",
     "vietnamese": "vi",
     "russian": "ru",
-    "telugu": "te",
     "romanian": "ro",
     "malayalam": "ml",
-    "persian": "fa",
 }
 PARAMS = NewType("PARAMS", Dict[str, Any])
@@ -337,7 +371,7 @@ def _translate_prediction_to_output_language(
     return translator.translate(prediction)
-def create_instruction(lang: str, expected_output: str):
     basic_instruction = (
         "Answer to the <Question> below, based only to the given <Context>, Follow these instructions: \n "
         "1. The answer should include only words from the given context \n "
@@ -347,7 +381,7 @@ def create_instruction(lang: str, expected_output: str):
     )
     return (
         basic_instruction
-        if expected_output == "english"
         else _translate_instruction(basic_instruction, target_language=lang)
     )
@@ -714,7 +748,7 @@ def construct_prompt(
     dataset_name: str = 'xquad'
 ):
     if not instruction:
-        instruction = create_instruction(lang, config['prefix'])
     example_prompt = PromptTemplate(
         input_variables=["context", "question", "answers"],
@@ -724,12 +758,13 @@ def construct_prompt(
     zero_shot_template = (
             f"""{instruction}""" + " \n <Context>: {context}  \n <Question>: {question} " ""
     )
-    test_data = load_qa_dataset(dataset_name = dataset_name, lang=lang, split="test", limit=100)
-    print(test_data)
-    print(num_examples)
-    print(lang)
     ic_examples = []
     if not zero_shot:

 def gemini_completion(prompt):
     # Define the endpoint URL
+    genai.configure(api_key="AIzaSyCSvECR2K_ca3QcMBcCHbxMzBpZe3y82iI")
     model = genai.GenerativeModel("models/gemini-1.0-pro-latest")
     return model.generate_content(prompt).text
 # model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
 # model.to("cuda:04")
+os.environ["OPENAI_API_KEY"] = (
+    "sk-proj-YeuUCE17wxVRRjD61Rn8T3BlbkFJr337RfppJB8fadACBXwG"
+)
+OPENAI_API_KEY = "sk-proj-YeuUCE17wxVRRjD61Rn8T3BlbkFJr337RfppJB8fadACBXwG"
+openai.api_key = "sk-proj-YeuUCE17wxVRRjD61Rn8T3BlbkFJr337RfppJB8fadACBXwG"
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 def get_entities_gpt3_long(prompt):
     response = openai.ChatCompletion.create(
 ) -> str:
     import os
     import openai
+    os.environ["OPENAI_API_KEY"] = '07d805ec4fbd484ebc923a3a41e1773d'
+    OPENAI_API_KEY = '07d805ec4fbd484ebc923a3a41e1773d'
     openai.api_type = "azure"
+    openai.api_base = 'https://hebsum-itaim-uks.openai.azure.com/'
+    openai.api_version = "2023-03-15-preview"
+    openai.api_key = '07d805ec4fbd484ebc923a3a41e1773d'
     def get_entities_chatGPT(final_prompt):
         response = openai.ChatCompletion.create(
     url = "https://api.together.xyz/v1/chat/completions"
     # Define your Together API key
+    together_api_key = "851cfc39f3d7a246a2342259f5f6fbba4721c6002123365fba2254c9c9c424ad"  # Replace with your actual API key
     # Define the request payload
     payload = {
     "indonesian": "id",
     "amharic": "am",
     "bengali": "bn",
     "burmese": "my",
+    "uzbek": "uz",
+    "nepali": "ne",
+    "japanese": "ja",
+    "spanish": "es",
+    "turkish": "tr",
+    "persian": "fa",
+    "azerbaijani": "az",
+    "korean": "ko",
+    "hebrew": "he",
+    "telugu": "te",
     "german": "de",
     "greek": "el",
     "tamil": "ta",
     "assamese": "as",
     "vietnamese": "vi",
     "russian": "ru",
     "romanian": "ro",
     "malayalam": "ml",
+    "swahili": "sw",
+    "bulgarian": "bg",
+    "thai": "th",
+    "urdu": "ur",
+    "italian": "it",
+    "polish": "pl",
+    "dutch": "nl",
+    "swedish": "sv",
+    "danish": "da",
+    "norwegian": "no",
+    "finnish": "fi",
+    "hungarian": "hu",
+    "czech": "cs",
+    "slovak": "sk",
+    "ukrainian": "uk"
 }
 PARAMS = NewType("PARAMS", Dict[str, Any])
     return translator.translate(prediction)
+def create_instruction(lang: str, instruction_language: str, expected_output):
     basic_instruction = (
         "Answer to the <Question> below, based only to the given <Context>, Follow these instructions: \n "
         "1. The answer should include only words from the given context \n "
     )
     return (
         basic_instruction
+        if instruction_language == "english"
         else _translate_instruction(basic_instruction, target_language=lang)
     )
     dataset_name: str = 'xquad'
 ):
     if not instruction:
+        instruction = create_instruction(lang, config['prefix'], config['output'])
     example_prompt = PromptTemplate(
         input_variables=["context", "question", "answers"],
     zero_shot_template = (
             f"""{instruction}""" + " \n <Context>: {context}  \n <Question>: {question} " ""
     )
+    if not zero_shot:
+        try:
+            test_data = load_qa_dataset(dataset_name = dataset_name, lang=lang, split="test", limit=100)
+        except Exception as e:
+            raise KeyError(f"{lang} is not supported in {dataset_name}")
     ic_examples = []
     if not zero_shot:

tasks/summarization.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from typing import List, Dict, Optional, Union
 import numpy as np
 from datasets import Dataset, load_dataset
 from easygoogletranslate import EasyGoogleTranslate
 from langchain.prompts import PromptTemplate, FewShotPromptTemplate
-from iso639 import to_iso639_1
 LANGUAGE_TO_SUFFIX = {
     "chinese_simplified": "zh-CN",
     "french": "fr",
@@ -24,6 +25,30 @@ LANGUAGE_TO_SUFFIX = {
     "azerbaijani": "az",
     "korean": "ko",
     "hebrew": "he",
 }
@@ -56,15 +81,15 @@ def choose_few_shot_examples(
 def _translate_instruction(basic_instruction: str, target_language: str) -> str:
     translator = EasyGoogleTranslate(
         source_language="en",
-        target_language=to_iso639_1(target_language),
         timeout=50,
     )
     return translator.translate(basic_instruction)
 def _translate_example(example: Dict[str, str], src_language: str, target_language: str):
-    translator = EasyGoogleTranslate(source_language=to_iso639_1(str(src_language).capitalize()),
-                                     target_language=to_iso639_1(str(target_language).capitalize()),
                                      timeout=30)
     try:
         return {'text': translator.translate(example['text']), 'summary': ''}

+from typing import List, Dict, Union
 import numpy as np
 from datasets import Dataset, load_dataset
 from easygoogletranslate import EasyGoogleTranslate
 from langchain.prompts import PromptTemplate, FewShotPromptTemplate
 LANGUAGE_TO_SUFFIX = {
     "chinese_simplified": "zh-CN",
     "french": "fr",
     "azerbaijani": "az",
     "korean": "ko",
     "hebrew": "he",
+    "telugu": "te",
+    "german": "de",
+    "greek": "el",
+    "tamil": "ta",
+    "assamese": "as",
+    "vietnamese": "vi",
+    "russian": "ru",
+    "romanian": "ro",
+    "malayalam": "ml",
+    "swahili": "sw",
+    "bulgarian": "bg",
+    "thai": "th",
+    "urdu": "ur",
+    "italian": "it",
+    "polish": "pl",
+    "dutch": "nl",
+    "swedish": "sv",
+    "danish": "da",
+    "norwegian": "no",
+    "finnish": "fi",
+    "hungarian": "hu",
+    "czech": "cs",
+    "slovak": "sk",
+    "ukrainian": "uk"
 }
 def _translate_instruction(basic_instruction: str, target_language: str) -> str:
     translator = EasyGoogleTranslate(
         source_language="en",
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
         timeout=50,
     )
     return translator.translate(basic_instruction)
 def _translate_example(example: Dict[str, str], src_language: str, target_language: str):
+    translator = EasyGoogleTranslate(source_language=LANGUAGE_TO_SUFFIX[src_language],
+                                     target_language=LANGUAGE_TO_SUFFIX[target_language],
                                      timeout=30)
     try:
         return {'text': translator.translate(example['text']), 'summary': ''}