Spaces:
Sleeping
Sleeping
| from evaluate import load | |
| import pandas as pd | |
| import string | |
| # Load SARI metric | |
| sari = load("sari") | |
| # Read the CSV | |
| df = pd.read_csv("MT0_xxl_results/result_pt_80p") | |
| def process_sentence(sentence): | |
| if not isinstance(sentence, str): | |
| return "" | |
| sentence = sentence.split('\n')[0] | |
| sentence = sentence.strip().lower() | |
| for punctuation in string.punctuation: | |
| sentence = sentence.replace(punctuation, "") | |
| sentence = sentence.strip() | |
| if sentence and sentence[-1] == '।': | |
| sentence = sentence[:-1] | |
| return sentence | |
| # Process predictions | |
| original = [process_sentence(s) for s in df['original']] | |
| predicted = [process_sentence(s) for s in df['pred_label']] | |
| # Assuming columns "ref1", "ref2", ... "refN" are reference columns | |
| # Change ["ref1", "ref2", "refN"] to your actual column names | |
| reference_columns = ["label1", "label2", "label3", "label4"] | |
| references = [] | |
| for _, row in df.iterrows(): | |
| current_references = [process_sentence(row[col]) for col in reference_columns] | |
| references.append(current_references) | |
| # Compute SARI score | |
| results = {} | |
| results['sari'] = sari.compute(sources=original, predictions=predicted, references=references) | |
| print(results) | |