Spaces:
Sleeping
Sleeping
| import csv | |
| import string | |
| from scipy.stats import wilcoxon | |
| import numpy as np | |
| def process_sentence(sentence): | |
| if not isinstance(sentence, str): | |
| return "" | |
| sentence = sentence.split('\n')[0] | |
| sentence = sentence.strip() | |
| sentence = sentence.lower() | |
| for punctuation in string.punctuation: | |
| sentence = sentence.replace(punctuation, "") | |
| sentence = sentence.strip() | |
| if sentence and sentence[-1] == '।': | |
| sentence = sentence[:-1] | |
| return sentence | |
| # Read CSV and generate exact match scores for Prompt A | |
| with open('MT0_xxl_results/result_vi', 'r') as csvfile: | |
| reader = csv.DictReader(csvfile) | |
| scores_a = [1 if process_sentence(row['pred_label']) == process_sentence(row['label']) else 0 for row in reader] | |
| # Read CSV and generate exact match scores for Prompt B | |
| with open('MT0_xxl_results/result_vi_80p', 'r') as csvfile: | |
| reader = csv.DictReader(csvfile) | |
| scores_b = [1 if process_sentence(row['pred_label']) == process_sentence(row['label']) else 0 for row in reader] | |
| # Count the number of 1s in each list | |
| count_a = scores_a.count(1) | |
| count_b = scores_b.count(1) | |
| # Print the counts | |
| print(f"Number of exact matches for Prompt A: {count_a}") | |
| print(f"Number of exact matches for Prompt B: {count_b}") | |
| # Conduct Wilcoxon Signed Rank test | |
| w_stat, p_val = wilcoxon(scores_a, scores_b) | |
| # Print the results | |
| print(f"Wilcoxon Signed Rank statistic: {w_stat}") | |
| print(f"P-value: {p_val}") | |
| if p_val < 0.05: | |
| print("The difference in score distributions between the prompts is statistically significant.") | |
| else: | |
| print("The difference in score distributions between the prompts is not statistically significant.") | |