From db202bba52e634b3054bf6e581334b3d8bf0cd96 Mon Sep 17 00:00:00 2001
From: Enno Hermann <enno.hermann@gmail.com>
Date: Wed, 25 Jul 2018 11:41:46 +0200
Subject: [PATCH] [scripts] Fix to analyze_alignments/lats.sh (double-counting)

If a single phoneme is aligned to the whole utterance, it is counted both as
`begin` and `end`, but is added to the total only once. This caused
`assert count >= 0` in analyze_phone_length_stats.py to fail. Now only the
`begin` is counted in that case.
---
 egs/wsj/s5/steps/diagnostic/analyze_alignments.sh | 2 +-
 egs/wsj/s5/steps/diagnostic/analyze_lats.sh       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh b/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh
index b641cd18cbb..ff0a87ae295 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh
+++ b/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh
@@ -44,7 +44,7 @@ $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \
   set -o pipefail '&&' ali-to-phones --write-lengths=true "$model"  \
       "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \
    sed -E 's/^[^ ]+ //' \| \
-   awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
+   awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; if (NF>1) print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
    sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
 
 if ! $cmd $dir/log/analyze_alignments.log \
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_lats.sh b/egs/wsj/s5/steps/diagnostic/analyze_lats.sh
index 98b33d9d09d..d580f516527 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_lats.sh
+++ b/egs/wsj/s5/steps/diagnostic/analyze_lats.sh
@@ -51,7 +51,7 @@ $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \
 $cmd JOB=1:$num_jobs $dir/log/get_lattice_stats.JOB.log \
   ali-to-phones --write-lengths=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \
   sed -E 's/^[^ ]+ //' \| \
-  awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
+  awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; if (NF>1) print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
   sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1