Spaces:
Running
Running
José Ángel González
commited on
Commit
·
cb18316
1
Parent(s):
e315877
add examples
Browse files- generation_evaluator.py +15 -14
- gradio_tst.py +12 -2
- requirements.txt +0 -1
generation_evaluator.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import datasets
|
| 2 |
import evaluate
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import spacy
|
| 5 |
import torch
|
| 6 |
from alignscore import AlignScore
|
| 7 |
-
import nltk
|
| 8 |
|
| 9 |
_CITATION = """\
|
| 10 |
@inproceedings{lin-2004-rouge,
|
|
@@ -150,8 +150,8 @@ class GenerationEvaluator(evaluate.Metric):
|
|
| 150 |
spacy.cli.download("en_core_web_sm")
|
| 151 |
|
| 152 |
# Download punkt for AlignScore
|
| 153 |
-
nltk.download(
|
| 154 |
-
|
| 155 |
# Download AlignScore model and move to GPU if possible
|
| 156 |
model_path = dl_manager.download(ALIGNSCORE_ARGS["ckpt_path"])
|
| 157 |
ALIGNSCORE_ARGS["ckpt_path"] = model_path
|
|
@@ -160,29 +160,31 @@ class GenerationEvaluator(evaluate.Metric):
|
|
| 160 |
)
|
| 161 |
self.align_scorer = AlignScore(**ALIGNSCORE_ARGS)
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
def _compute(self, predictions, references):
|
| 164 |
# Compute ROUGE
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
rouge_results = rouge_score.compute(
|
| 168 |
predictions=predictions, references=references
|
| 169 |
)
|
| 170 |
|
| 171 |
# Compute BLEU
|
| 172 |
-
|
| 173 |
-
bleu_results = bleu_score.compute(
|
| 174 |
predictions=predictions, references=references
|
| 175 |
)
|
| 176 |
|
| 177 |
# Compute Exact Match
|
| 178 |
-
|
| 179 |
-
exact_match_results = exact_match_score.compute(
|
| 180 |
predictions=predictions, references=references
|
| 181 |
)
|
| 182 |
|
| 183 |
# Compute BERTScore
|
| 184 |
-
|
| 185 |
-
bert_score_results = bert_score.compute(
|
| 186 |
predictions=predictions, references=references, lang="en"
|
| 187 |
)
|
| 188 |
|
|
@@ -203,8 +205,7 @@ class GenerationEvaluator(evaluate.Metric):
|
|
| 203 |
)
|
| 204 |
|
| 205 |
# Compute CHRF
|
| 206 |
-
|
| 207 |
-
chrf_results = chrf.compute(
|
| 208 |
predictions=predictions, references=references
|
| 209 |
)
|
| 210 |
|
|
|
|
| 1 |
import datasets
|
| 2 |
import evaluate
|
| 3 |
+
import nltk
|
| 4 |
import numpy as np
|
| 5 |
import spacy
|
| 6 |
import torch
|
| 7 |
from alignscore import AlignScore
|
|
|
|
| 8 |
|
| 9 |
_CITATION = """\
|
| 10 |
@inproceedings{lin-2004-rouge,
|
|
|
|
| 150 |
spacy.cli.download("en_core_web_sm")
|
| 151 |
|
| 152 |
# Download punkt for AlignScore
|
| 153 |
+
nltk.download("punkt_tab")
|
| 154 |
+
|
| 155 |
# Download AlignScore model and move to GPU if possible
|
| 156 |
model_path = dl_manager.download(ALIGNSCORE_ARGS["ckpt_path"])
|
| 157 |
ALIGNSCORE_ARGS["ckpt_path"] = model_path
|
|
|
|
| 160 |
)
|
| 161 |
self.align_scorer = AlignScore(**ALIGNSCORE_ARGS)
|
| 162 |
|
| 163 |
+
# Prepare scorers
|
| 164 |
+
self.rouge_scorer = evaluate.load("rouge")
|
| 165 |
+
self.bleu_scorer = evaluate.load("bleu")
|
| 166 |
+
self.exact_match_scorer = evaluate.load("exact_match")
|
| 167 |
+
self.bert_scorer = evaluate.load("bertscore")
|
| 168 |
+
self.chrf_scorer = evaluate.load("chrf")
|
| 169 |
+
|
| 170 |
def _compute(self, predictions, references):
|
| 171 |
# Compute ROUGE
|
| 172 |
+
rouge_results = self.rouge_scorer.compute(
|
|
|
|
|
|
|
| 173 |
predictions=predictions, references=references
|
| 174 |
)
|
| 175 |
|
| 176 |
# Compute BLEU
|
| 177 |
+
bleu_results = self.bleu_scorer.compute(
|
|
|
|
| 178 |
predictions=predictions, references=references
|
| 179 |
)
|
| 180 |
|
| 181 |
# Compute Exact Match
|
| 182 |
+
exact_match_results = self.exact_match_scorer.compute(
|
|
|
|
| 183 |
predictions=predictions, references=references
|
| 184 |
)
|
| 185 |
|
| 186 |
# Compute BERTScore
|
| 187 |
+
bert_score_results = self.bert_scorer.compute(
|
|
|
|
| 188 |
predictions=predictions, references=references, lang="en"
|
| 189 |
)
|
| 190 |
|
|
|
|
| 205 |
)
|
| 206 |
|
| 207 |
# Compute CHRF
|
| 208 |
+
chrf_results = self.chrf_scorer.compute(
|
|
|
|
| 209 |
predictions=predictions, references=references
|
| 210 |
)
|
| 211 |
|
gradio_tst.py
CHANGED
|
@@ -117,6 +117,15 @@ def launch_gradio_widget2(metric):
|
|
| 117 |
def compute(data):
|
| 118 |
return metric.compute(**parse_gradio_data(data, gradio_input_types))
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
iface = gr.Interface(
|
| 121 |
fn=compute,
|
| 122 |
inputs=gr.Dataframe(
|
|
@@ -132,8 +141,9 @@ def launch_gradio_widget2(metric):
|
|
| 132 |
),
|
| 133 |
title=f"Metric: {metric.name}",
|
| 134 |
article=parse_readme(local_path / "README.md"),
|
| 135 |
-
|
| 136 |
-
|
|
|
|
| 137 |
)
|
| 138 |
|
| 139 |
iface.launch(share=True)
|
|
|
|
| 117 |
def compute(data):
|
| 118 |
return metric.compute(**parse_gradio_data(data, gradio_input_types))
|
| 119 |
|
| 120 |
+
test_cases = [
|
| 121 |
+
{
|
| 122 |
+
"predictions": [
|
| 123 |
+
"You are so good",
|
| 124 |
+
"Madrid is the capital of Spain",
|
| 125 |
+
],
|
| 126 |
+
"references": ["You are so bad", "Paris is the capital of France"],
|
| 127 |
+
}
|
| 128 |
+
]
|
| 129 |
iface = gr.Interface(
|
| 130 |
fn=compute,
|
| 131 |
inputs=gr.Dataframe(
|
|
|
|
| 141 |
),
|
| 142 |
title=f"Metric: {metric.name}",
|
| 143 |
article=parse_readme(local_path / "README.md"),
|
| 144 |
+
examples=[
|
| 145 |
+
parse_test_cases(test_cases, feature_names, gradio_input_types)
|
| 146 |
+
],
|
| 147 |
)
|
| 148 |
|
| 149 |
iface.launch(share=True)
|
requirements.txt
CHANGED
|
@@ -5,7 +5,6 @@ gradio
|
|
| 5 |
bert_score
|
| 6 |
rouge_score
|
| 7 |
numpy
|
| 8 |
-
git+https://github.com/huggingface/evaluate@a4bdc10c48a450b978d91389a48dbb5297835c7d
|
| 9 |
sacrebleu
|
| 10 |
git+https://github.com/yuh-zha/AlignScore.git
|
| 11 |
spacy
|
|
|
|
| 5 |
bert_score
|
| 6 |
rouge_score
|
| 7 |
numpy
|
|
|
|
| 8 |
sacrebleu
|
| 9 |
git+https://github.com/yuh-zha/AlignScore.git
|
| 10 |
spacy
|