Skip to content

Commit cc6a6e0

Browse files
committed
Eval of manual Translation
1 parent d2c0f91 commit cc6a6e0

File tree

4 files changed

+79
-3
lines changed

4 files changed

+79
-3
lines changed
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
4+
set=$1
5+
name=$2
6+
7+
mkdir -p /data/${name}/eval
8+
mkdir -p /data/${name}/valid
9+
10+
##TOKENIZE
11+
##SMARTCASE
12+
##BPE
13+
14+
cat /data/orig/eval/$set/IWSLT.$set/IWSLT.TED.$set.$sl-$tl.$sl.xml | \
15+
grep "<seg id" | sed -e "s/<[^>]*>//g" | \
16+
perl /opt/mosesdecoder/scripts/tokenizer/tokenizer.perl -l ${sl} | \
17+
/opt/mosesdecoder/scripts/recaser/truecase.perl --model /model/${name}/truecase-model.s | \
18+
/opt/subword-nmt/apply_bpe.py -c /model/${name}/codec --vocabulary /model/${name}/voc.s --vocabulary-threshold 50 \
19+
> /data/${name}/eval/manualTranscript.$set.s
20+
21+
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
3+
set=$1
4+
output=$2
5+
6+
7+
source=/data/orig/eval/${set}/IWSLT.${set}/IWSLT.TED.${set}.${sl}-${tl}.${sl}.xml
8+
reference=/data/orig/eval/${set}/IWSLT.${set}/IWSLT.TED.${set}.${sl}-${tl}.${tl}.xml
9+
10+
11+
12+
mkdir -p /tmp/eval.manual
13+
mkdir -p /results/$systemName/$set/
14+
15+
16+
grep "<seg id" $reference | sed -e "s/<[^>]*>//g" > /tmp/eval.manual/$set.reference
17+
18+
sed -e "s/@@ //g" /data/$output/eval/manualTranscript.$set.t | sed -e "s/@@$//g" | sed -e "s/&apos;/'/g" -e 's/&#124;/|/g' -e "s/&amp;/&/g" -e 's/&lt;/>/g' -e 's/&gt;/>/g' -e 's/&quot;/"/g' -e 's/&#91;/[/g' -e 's/&#93;/]/g' | perl -nle 'print ucfirst' > /tmp/eval.manual/$set.hyp
19+
20+
sed -e "s/^\s*$/_EMPTY_/g" /tmp/eval.manual/$set.hyp > /tmp/eval.manual/$set.no-empty.hyp
21+
cat /tmp/eval.manual/$set.hyp | perl /opt/SLT.KIT/scripts/evaluate/wrap-xml.perl de /data/orig/eval/dev2010/IWSLT.dev2010/IWSLT.TED.dev2010.en-de.en.xml $systemName > /tmp/eval.manual/$set.xml
22+
23+
24+
25+
/opt/mosesdecoder/scripts/generic/mteval-v14.pl -c -s $source -r $reference -t /tmp/eval.manual/$set.xml > /results/$systemName/$set/manualTranscript.BLEU.case-sensitive
26+
/opt/mosesdecoder/scripts/generic/mteval-v14.pl -s $source -r $reference -t /tmp/eval.manual/$set.xml > /results/$systemName/$set/manualTranscript.BLEU.case-insensitive
27+
28+
java -Dfile.encoding=UTF8 -jar /opt/tercom-0.7.25/tercom.7.25.jar -N -s -r $reference -h /tmp/eval.manual/$set.xml > /results/$systemName/$set/manualTranscript.TER.case-sensitive
29+
java -Dfile.encoding=UTF8 -jar /opt/tercom-0.7.25/tercom.7.25.jar -N -r $reference -h /tmp/eval.manual/$set.xml > /results/$systemName/$set/manualTranscript.TER.case-insensitive
30+
31+
/opt/beer_2.0/beer -s /tmp/eval.manual/$set.hyp -r /tmp/eval.manual/$set.reference > /results/$systemName/$set/manualTranscript.BEER.case-sensitive
32+
/opt/CharacTER/CharacTER.py -r /tmp/eval.manual/$set.reference -o /tmp/eval.manual/$set.no-empty.hyp > /results/$systemName/$set/manualTranscript.CharacTER.case-sensitive
33+
34+
35+
BLEU=`grep BLEU /results/$systemName/$set/manualTranscript.BLEU.case-sensitive | head -n 1 | awk '{print $8*100}'`
36+
ciBLEU=`grep BLEU /results/$systemName/$set/manualTranscript.BLEU.case-insensitive | head -n 1 | awk '{print $8*100}'`
37+
TER=`grep TER /results/$systemName/$set/manualTranscript.TER.case-sensitive | awk '{printf("%.2f\n",$3*100)}'`
38+
ciTER=`grep TER /results/$systemName/$set/manualTranscript.TER.case-insensitive | awk '{printf("%.2f\n",$3*100)}'`
39+
beer=`awk '{printf("%.2f\n",$3*100)}' /results/$systemName/$set/manualTranscript.BEER.case-sensitive`
40+
character=`awk '{printf("%.2f\n",$1*100)}' /results/$systemName/$set/manualTranscript.CharacTER.case-sensitive`
41+
42+
echo "| $set (manual Transcript) | $BLEU | $TER | $beer | $character | $ciBLEU | $ciTER |" >> /results/$systemName/$set/Summary.md

scripts/evaluate/Eval.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ java -Dfile.encoding=UTF8 -jar /opt/tercom-0.7.25/tercom.7.25.jar -N -s -r $refe
3535
java -Dfile.encoding=UTF8 -jar /opt/tercom-0.7.25/tercom.7.25.jar -N -r $reference -h /tmp/eval/$set.no-case.xml > /results/$systemName/$set/TER.case-insensitive
3636

3737
/opt/beer_2.0/beer -s /tmp/eval/$set.hyp -r /tmp/eval/$set.reference > /results/$systemName/$set/BEER.case-sensitive
38-
/opt/CharacTER/CharacTER.py -r /tmp/eval/$set.reference -o /tmp/eval/$set.no-empty.hyp > /results/$systemName/$set/CharacTER.case-senstive
38+
/opt/CharacTER/CharacTER.py -r /tmp/eval/$set.reference -o /tmp/eval/$set.no-empty.hyp > /results/$systemName/$set/CharacTER.case-sensitive
3939

4040

4141
BLEU=`grep BLEU /results/$systemName/$set/BLEU.case-sensitive | head -n 1 | awk '{print $8*100}'`
4242
ciBLEU=`grep BLEU /results/$systemName/$set/BLEU.case-insensitive | head -n 1 | awk '{print $8*100}'`
4343
TER=`grep TER /results/$systemName/$set/TER.case-sensitive | awk '{printf("%.2f\n",$3*100)}'`
4444
ciTER=`grep TER /results/$systemName/$set/TER.case-insensitive | awk '{printf("%.2f\n",$3*100)}'`
4545
beer=`awk '{printf("%.2f\n",$3*100)}' /results/$systemName/$set/BEER.case-sensitive`
46-
character=`awk '{printf("%.1f\n",$3*100)}' /results/$systemName/$set/BEER.case-sensitive`
46+
character=`awk '{printf("%.2f\n",$1*100)}' /results/$systemName/$set/CharacTER.case-sensitive`
4747

4848
echo "Results for $systemName" > /results/$systemName/$set/Summary.md
4949
echo "=======================" >> /results/$systemName/$set/Summary.md

systems/smallTED/Translate.sh

+14-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,19 @@ fi
1717
#Translate
1818
/opt/SLT.KIT/scripts/openNMT-py/Translate.sh $set monoTransPrepro mt
1919

20-
2120
#Eval
2221
/opt/SLT.KIT/scripts/evaluate/Eval.sh dev2010 mt
22+
23+
#Prepro manual transcript
24+
/opt/SLT.KIT/scripts/defaultPreprocessor/Translate.sh $set prepro
25+
26+
#Translate manual transcript
27+
/opt/SLT.KIT/scripts/openNMT-py/Translate.sh manualTranscript.$set prepro mt
28+
29+
#Eval manual transcript
30+
/opt/SLT.KIT/scripts/evaluate/Eval.manualTranscript.sh dev2010 mt
31+
32+
33+
34+
35+

0 commit comments

Comments
 (0)