From db202bba52e634b3054bf6e581334b3d8bf0cd96 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 25 Jul 2018 11:41:46 +0200 Subject: [PATCH] [scripts] Fix to analyze_alignments/lats.sh (double-counting) If a single phoneme is aligned to the whole utterance, it is counted both as `begin` and `end`, but is added to the total only once. This caused `assert count >= 0` in analyze_phone_length_stats.py to fail. Now only the `begin` is counted in that case. --- egs/wsj/s5/steps/diagnostic/analyze_alignments.sh | 2 +- egs/wsj/s5/steps/diagnostic/analyze_lats.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh b/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh index b641cd18cbb..ff0a87ae295 100755 --- a/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh +++ b/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh @@ -44,7 +44,7 @@ $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \ set -o pipefail '&&' ali-to-phones --write-lengths=true "$model" \ "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \ sed -E 's/^[^ ]+ //' \| \ - awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \ + awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; if (NF>1) print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \ sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1 if ! $cmd $dir/log/analyze_alignments.log \ diff --git a/egs/wsj/s5/steps/diagnostic/analyze_lats.sh b/egs/wsj/s5/steps/diagnostic/analyze_lats.sh index 98b33d9d09d..d580f516527 100755 --- a/egs/wsj/s5/steps/diagnostic/analyze_lats.sh +++ b/egs/wsj/s5/steps/diagnostic/analyze_lats.sh @@ -51,7 +51,7 @@ $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \ $cmd JOB=1:$num_jobs $dir/log/get_lattice_stats.JOB.log \ ali-to-phones --write-lengths=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \ sed -E 's/^[^ ]+ //' \| \ - awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \ + awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; if (NF>1) print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \ sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1