Spaces:
Running
Running
| # Copyright 2020 The HuggingFace Evaluate Authors. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ IndicGLUE benchmark metric. """ | |
| import datasets | |
| import numpy as np | |
| from scipy.spatial.distance import cdist | |
| from scipy.stats import pearsonr, spearmanr | |
| from sklearn.metrics import f1_score | |
| import evaluate | |
| _CITATION = """\ | |
| @inproceedings{kakwani2020indicnlpsuite, | |
| title={{IndicNLPSuite: Monolingual Corpora, Evaluation Benchmarks and Pre-trained Multilingual Language Models for Indian Languages}}, | |
| author={Divyanshu Kakwani and Anoop Kunchukuttan and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, | |
| year={2020}, | |
| booktitle={Findings of EMNLP}, | |
| } | |
| """ | |
| _DESCRIPTION = """\ | |
| IndicGLUE is a natural language understanding benchmark for Indian languages. It contains a wide | |
| variety of tasks and covers 11 major Indian languages - as, bn, gu, hi, kn, ml, mr, or, pa, ta, te. | |
| """ | |
| _KWARGS_DESCRIPTION = """ | |
| Compute IndicGLUE evaluation metric associated to each IndicGLUE dataset. | |
| Args: | |
| predictions: list of predictions to score (as int64), | |
| except for 'cvit-mkb-clsr' where each prediction is a vector (of float32). | |
| references: list of ground truth labels corresponding to the predictions (as int64), | |
| except for 'cvit-mkb-clsr' where each reference is a vector (of float32). | |
| Returns: depending on the IndicGLUE subset, one or several of: | |
| "accuracy": Accuracy | |
| "f1": F1 score | |
| "precision": Precision@10 | |
| Examples: | |
| >>> indic_glue_metric = evaluate.load('indic_glue', 'wnli') # 'wnli' or any of ["copa", "sna", "csqa", "wstp", "inltkh", "bbca", "iitp-mr", "iitp-pr", "actsa-sc", "md"] | |
| >>> references = [0, 1] | |
| >>> predictions = [0, 1] | |
| >>> results = indic_glue_metric.compute(predictions=predictions, references=references) | |
| >>> print(results) | |
| {'accuracy': 1.0} | |
| >>> indic_glue_metric = evaluate.load('indic_glue', 'wiki-ner') | |
| >>> references = [0, 1] | |
| >>> predictions = [0, 1] | |
| >>> results = indic_glue_metric.compute(predictions=predictions, references=references) | |
| >>> print(results) | |
| {'accuracy': 1.0, 'f1': 1.0} | |
| >>> indic_glue_metric = evaluate.load('indic_glue', 'cvit-mkb-clsr') | |
| >>> references = [[0.5, 0.5, 0.5], [0.1, 0.2, 0.3]] | |
| >>> predictions = [[0.5, 0.5, 0.5], [0.1, 0.2, 0.3]] | |
| >>> results = indic_glue_metric.compute(predictions=predictions, references=references) | |
| >>> print(results) | |
| {'precision@10': 1.0} | |
| """ | |
| def simple_accuracy(preds, labels): | |
| return float((preds == labels).mean()) | |
| def acc_and_f1(preds, labels): | |
| acc = simple_accuracy(preds, labels) | |
| f1 = float(f1_score(y_true=labels, y_pred=preds)) | |
| return { | |
| "accuracy": acc, | |
| "f1": f1, | |
| } | |
| def precision_at_10(en_sentvecs, in_sentvecs): | |
| en_sentvecs = np.array(en_sentvecs) | |
| in_sentvecs = np.array(in_sentvecs) | |
| n = en_sentvecs.shape[0] | |
| # mean centering | |
| en_sentvecs = en_sentvecs - np.mean(en_sentvecs, axis=0) | |
| in_sentvecs = in_sentvecs - np.mean(in_sentvecs, axis=0) | |
| sim = cdist(en_sentvecs, in_sentvecs, "cosine") | |
| actual = np.array(range(n)) | |
| preds = sim.argsort(axis=1)[:, :10] | |
| matches = np.any(preds == actual[:, None], axis=1) | |
| return float(matches.mean()) | |
| class IndicGlue(evaluate.Metric): | |
| def _info(self): | |
| if self.config_name not in [ | |
| "wnli", | |
| "copa", | |
| "sna", | |
| "csqa", | |
| "wstp", | |
| "inltkh", | |
| "bbca", | |
| "cvit-mkb-clsr", | |
| "iitp-mr", | |
| "iitp-pr", | |
| "actsa-sc", | |
| "md", | |
| "wiki-ner", | |
| ]: | |
| raise KeyError( | |
| "You should supply a configuration name selected in " | |
| '["wnli", "copa", "sna", "csqa", "wstp", "inltkh", "bbca", ' | |
| '"cvit-mkb-clsr", "iitp-mr", "iitp-pr", "actsa-sc", "md", ' | |
| '"wiki-ner"]' | |
| ) | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=datasets.Features( | |
| { | |
| "predictions": datasets.Value("int64") | |
| if self.config_name != "cvit-mkb-clsr" | |
| else datasets.Sequence(datasets.Value("float32")), | |
| "references": datasets.Value("int64") | |
| if self.config_name != "cvit-mkb-clsr" | |
| else datasets.Sequence(datasets.Value("float32")), | |
| } | |
| ), | |
| codebase_urls=[], | |
| reference_urls=[], | |
| format="numpy" if self.config_name != "cvit-mkb-clsr" else None, | |
| ) | |
| def _compute(self, predictions, references): | |
| if self.config_name == "cvit-mkb-clsr": | |
| return {"precision@10": precision_at_10(predictions, references)} | |
| elif self.config_name in ["wiki-ner"]: | |
| return acc_and_f1(predictions, references) | |
| elif self.config_name in [ | |
| "wnli", | |
| "copa", | |
| "sna", | |
| "csqa", | |
| "wstp", | |
| "inltkh", | |
| "bbca", | |
| "iitp-mr", | |
| "iitp-pr", | |
| "actsa-sc", | |
| "md", | |
| ]: | |
| return {"accuracy": simple_accuracy(predictions, references)} | |
| else: | |
| raise KeyError( | |
| "You should supply a configuration name selected in " | |
| '["wnli", "copa", "sna", "csqa", "wstp", "inltkh", "bbca", ' | |
| '"cvit-mkb-clsr", "iitp-mr", "iitp-pr", "actsa-sc", "md", ' | |
| '"wiki-ner"]' | |
| ) | |