-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_utils.py
71 lines (52 loc) · 2.46 KB
/
eval_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.rouge.rouge import Rouge
def compute_accuracy(reference_strings, predicted_strings):
assert(len(reference_strings) == len(predicted_strings))
correct = 0.0
for i in range(len(reference_strings)):
if reference_strings[i] == predicted_strings[i]:
correct += 1
return 100 * correct/float(len(reference_strings))
def compute_bleu(references, hypotheses):
bleu_4_sentence_scores = []
for ref, hyp in zip(references, hypotheses):
bleu_4_sentence_scores.append(sentence_bleu(ref, hyp, smoothing_function=SmoothingFunction().method2))
return 100*sum(bleu_4_sentence_scores)/float(len(bleu_4_sentence_scores))
def compute_sentence_bleu(ref, hyp):
return sentence_bleu(ref, hyp, smoothing_function=SmoothingFunction().method2)
def compute_sentence_meteor_rouge(reference_list, sentences):
preds = dict()
refs = dict()
for i in range(len(sentences)):
preds[i] = [' '.join([''.join(s.split()) for s in sentences[i]])]
refs[i] = [' '.join(l) for l in reference_list[i]]
final_scores = dict()
scorers = [
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L"),
]
for scorer, method in scorers:
score, scores = scorer.compute_score(refs, preds)
if type(method) == list:
for sc, scs, m in zip(score, scores, method):
final_scores[m] = scs
else:
final_scores[method] = scores
return final_scores['METEOR'], final_scores['ROUGE_L']
def compute_meteor_rouge(reference_list, sentences):
"""
return: METEOR, ROUGE_L
"""
meteors, rougels = compute_sentence_meteor_rouge(reference_list, sentences)
return 100 * sum(meteors)/len(meteors), 100 * sum(rougels)/len(rougels)
def test():
references = [[['Modified', 'the', 'code', 'to', 'work', 'with', 'Python', '3']],
[['Add', 'the', 'following', 'in', 'System', 'Variables']]]
pred_instances = [['Modified', 'the', 'code', 'to', 'work', 'with', 'Python', '2'],
['Add', 'the', 'following', 'in', 'System', 'Variables']]
print('Predicted BLEU: {}'.format(compute_bleu(references, pred_instances)))
meteor, rougel = compute_meteor_rouge(references, pred_instances)
print('Predicted Meteor: {}\nPredicted ROUGE_L: {}\n'.format(meteor, rougel))
if __name__ == "__main__":
test()