Spaces:
Runtime error
Runtime error
| from typing import List, Dict, Any | |
| from easygoogletranslate import EasyGoogleTranslate | |
| from langchain.prompts import PromptTemplate, FewShotPromptTemplate | |
| LANGUAGE_TO_GOOGLE_TRANSLATE_MARK = { | |
| "english": "en", | |
| "bambara": "bm", | |
| "ewe": "ee", | |
| "hausa": "ha", | |
| "igbo": "ig", | |
| "kinyarwanda": "rw", | |
| "chichewa": "ny", | |
| "twi": "ak", | |
| "yoruba": "yo", | |
| "slovak": "sk", | |
| "serbian": "sr", | |
| "swedish": "sv", | |
| "vietnamese": "vi", | |
| "italian": "it", | |
| "portuguese": "pt", | |
| "chinese": "zh", | |
| "english": "en", | |
| "french": "fr" | |
| } | |
| LANGAUGE_TO_PREFIX = { | |
| "bambara": "bam", | |
| "ewe": "ewe", | |
| "fon": "fon", | |
| "hausa": "hau", | |
| "igbo": "ibo", | |
| "kinyarwanda": "kin", | |
| "chichewa": "nya", | |
| "twi": "twi", | |
| "yoruba": "yor", | |
| "slovak": "sk", | |
| "serbian": "sr", | |
| "swedish": "sv", | |
| "vietnamese": "vi", | |
| "italian": "it", | |
| "portuguese": "pt", | |
| "chinese": "zh", | |
| "english": "en", | |
| "french": "fr" | |
| } | |
| def _translate_instruction(basic_instruction: str, target_language: str) -> str: | |
| translator = EasyGoogleTranslate( | |
| source_language="en", | |
| target_language=LANGAUGE_TO_PREFIX[target_language], | |
| timeout=10, | |
| ) | |
| return translator.translate(basic_instruction) | |
| def create_instruction(lang: str, expected_output: str): | |
| basic_instruction = f"""You are an NLP assistant whose | |
| purpose is to perform Named Entity Recognition | |
| (NER). You will need to give each entity a tag, from the following: | |
| PER means a person, ORG means organization. | |
| LOC means a location entity. | |
| The output should be a list of tuples of the format: | |
| ['Tag: Entity', 'Tag: Entity'] for each entity in the sentence. | |
| The entities should be in {expected_output} language""" | |
| return ( | |
| basic_instruction | |
| if lang == "english" | |
| else _translate_instruction(basic_instruction, target_language=lang) | |
| ) | |
| def construct_prompt( | |
| instruction: str, | |
| test_example: dict, | |
| zero_shot: bool, | |
| dataset: str, | |
| num_examples: int, | |
| lang: str, | |
| config: Dict[str, str], | |
| ): | |
| if not instruction: | |
| print(lang) | |
| instruction = create_instruction(lang, config['prefix']) | |
| example_prompt = PromptTemplate( | |
| input_variables=["summary", "text"], template="Text: {text}\nSummary: {summary}" | |
| ) | |
| zero_shot_template = f"""{instruction}""" + "\n Input: {text} " "" | |
| test_data = load_xlsum_data(lang=lang, split="test", limit=100) | |
| print(test_data) | |
| print(num_examples) | |
| print(lang) | |
| ic_examples = [] | |
| if not zero_shot: | |
| ic_examples = choose_few_shot_examples( | |
| train_dataset=test_data, | |
| few_shot_size=num_examples, | |
| context=[config["context"]] * num_examples, | |
| selection_criteria="random", | |
| lang=lang, | |
| ) | |
| prompt = ( | |
| FewShotPromptTemplate( | |
| examples=ic_examples, | |
| prefix=instruction, | |
| example_prompt=example_prompt, | |
| suffix="<Text>: {text}", | |
| input_variables=["text"], | |
| ) | |
| if not zero_shot | |
| else PromptTemplate(input_variables=["text"], template=zero_shot_template) | |
| ) | |
| print("lang", lang) | |
| print(config["input"] , lang) | |
| if config["input"] != lang: | |
| test_example = _translate_example( | |
| example=test_example, src_language=lang, target_language=config["input"] | |
| ) | |
| print("test_example", prompt) | |
| return prompt.format(text=test_example["text"]) | |