Spaces:
Runtime error
Runtime error
| import enum | |
| import pandas as pd | |
| from tasks import ner, nli, qa, summarization | |
| class LanguageType(enum.Enum): | |
| Low = "Low" | |
| High = "High" | |
| class ModelType(enum.Enum): | |
| English = "English" | |
| Multilingual = "Multilingual" | |
| QA = "QA" | |
| SUMMARIZATION = "Summarization" | |
| NLI = "NLI" | |
| NER = "NER" | |
| def construct_generic_prompt( | |
| task, | |
| instruction, | |
| test_example, | |
| zero_shot, | |
| num_examples, | |
| selected_language, | |
| dataset, | |
| config, | |
| ): | |
| print(task) | |
| if task == SUMMARIZATION: | |
| prompt = summarization.construct_prompt( | |
| instruction=instruction, | |
| test_example=test_example, | |
| zero_shot=zero_shot, | |
| dataset=dataset, | |
| num_examples=num_examples, | |
| lang=str(selected_language).lower(), | |
| config=config, | |
| ) | |
| elif task == NER: | |
| prompt = ner.construct_prompt( | |
| instruction=instruction, | |
| test_example=test_example, | |
| zero_shot=zero_shot, | |
| dataset=dataset, | |
| num_examples=num_examples, | |
| lang=str(selected_language).lower(), | |
| config=config, | |
| ) | |
| elif task == QA: | |
| prompt = qa.construct_prompt( | |
| instruction=instruction, | |
| test_example=test_example, | |
| zero_shot=zero_shot, | |
| num_examples=num_examples, | |
| lang=str(selected_language).lower(), | |
| config=config, | |
| # dataset_name=dataset | |
| ) | |
| else: | |
| prompt = nli.construct_prompt( | |
| instruction=instruction, | |
| test_example=test_example, | |
| zero_shot=zero_shot, | |
| num_examples=num_examples, | |
| lang=str(selected_language).lower(), | |
| config=config, | |
| ) | |
| return prompt | |
| def _get_language_type(language: str): | |
| df = pd.read_csv("utils/languages_by_word_count.csv") | |
| number_of_words = df[df["Language"] == language]["number of words"].iloc[0] | |
| print(number_of_words) | |
| return LanguageType.Low if number_of_words < 150276400 else LanguageType.High | |
| class Config: | |
| def __init__( | |
| self, prefix="source", context="source", examples="source", output="source" | |
| ): | |
| self.prefix = prefix | |
| self.context = context | |
| self.examples = examples | |
| self.output = output | |
| def set(self, prefix=None, context=None, examples=None, output=None): | |
| if prefix: | |
| self.prefix = prefix | |
| if context: | |
| self.context = context | |
| if examples: | |
| self.examples = examples | |
| if output: | |
| self.output = output | |
| def to_dict(self): | |
| return { | |
| "instruction": self.prefix, | |
| "context": self.context, | |
| "examples": self.examples, | |
| "output": self.output, | |
| } | |
| def recommend_config(task, lang, model_type): | |
| language_type = _get_language_type(lang) | |
| config = Config(lang, lang, lang, lang) | |
| if task == QA: | |
| if model_type == ModelType.English.value: | |
| config.set(prefix=lang, context=lang, examples=lang, output=lang) | |
| else: | |
| config.set(prefix="English", context=lang, examples=lang, output=lang) | |
| if task == NER: | |
| if model_type == ModelType.English.value: | |
| config.set(prefix=lang, context=lang, examples=lang, output=lang) | |
| elif language_type == LanguageType.High: | |
| config.set(prefix="English", context=lang, examples=lang, output=lang) | |
| else: | |
| config.set(prefix="English", context=lang, examples=lang, output="English") | |
| if task == NLI: | |
| if model_type == ModelType.English.value: | |
| config.set(prefix=lang, context=lang, examples=lang, output=lang) | |
| elif language_type == LanguageType.High: | |
| config.set(prefix="English", context=lang, examples="English") | |
| else: | |
| config.set(prefix="English", context="English", examples="English") | |
| if task == SUMMARIZATION: | |
| config.set(context="English") | |
| print(config.to_dict()) | |
| return config.to_dict() | |