-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbleu.py
executable file
·60 lines (48 loc) · 2.55 KB
/
bleu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
import math
from collections import Counter
import numpy
import sys
# written by Adam Lopez
# Collect BLEU-relevant statistics for a single hypothesis/reference pair.
# Return value is a generator yielding:
# (c, r, numerator1, denominator1, ... numerator4, denominator4)
# Summing the columns across calls to this function on an entire corpus will
# produce a vector of statistics that can be used to compute BLEU (below)
# Optionally computer lowercase BLEU stats
def bleu_stats(hypothesis, reference, lowercase):
stats = []
stats.append(len(hypothesis))
stats.append(len(reference))
if lowercase:
hypothesis = hypothesis.lower()
reference = reference.lower()
for n in range(1,5):
s_ngrams = Counter([tuple(hypothesis[i:i+n]) for i in range(len(hypothesis)+1-n)])
r_ngrams = Counter([tuple(reference[i:i+n]) for i in range(len(reference)+1-n)])
stats.append(max([sum((s_ngrams & r_ngrams).values()), 0])) #n-gram precision
stats.append(max([len(hypothesis)+1-n, 0])) #number of n-grams of length n in hypothesis. 0 if len(sent) < n
return stats
# Compute BLEU from collected statistics obtained by call(s) to bleu_stats
def bleu(stats):
(c, r) = stats[:2] #lengths of candidate, reference
bp = 1 if c>r else math.exp(1-r/c) #brevity penalty
ngram_precisions = [ float(x)/y for x,y in zip(stats[2::2],stats[3::2]) ] #list of 1,2,3,4-gram precisions
if len(list(filter(lambda x: x==0, stats))) > 0: #if at least one of {1..4}-gram precisions is 0, bleu is 0
return 0, ngram_precisions
avg_log_bleu_precision = sum([math.log(x) for x in ngram_precisions]) / 4.
bleu = 100 * bp * math.exp(min([0, 1-float(r)/c]) + avg_log_bleu_precision)
return bleu, ngram_precisions
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-h", "--hyp", default=None)
parser.add_argument("-r", "--ref", default=None)
parser.add_argument("-l", "--lowercase", default=False)
args = parser.parse_args()
stats = numpy.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
for hyp, ref in zip(open(args.hyp, 'r', encoding='utf-8'), open(args.ref, 'r', encoding='utf-8')): #hypothesis file, reference file
hyp, ref = (hyp.strip("<s>").strip("</s>").strip(), ref.strip().split())
stats += numpy.array(bleu_stats(hyp, ref, args.lowercase))
bleu_score, ngram_precisions = bleu(stats)
print("BLEU: {0:0.2f}".format(bleu_score))
print("N-gram Precisions: 1: {}, 2: {}, 3: {}, 4: {}.".format(*ngram_precisions))