diff --git a/egs/wsj/s5/steps/get_ctm.sh b/egs/wsj/s5/steps/get_ctm.sh index 4050945ab72..690e6cee4f2 100755 --- a/egs/wsj/s5/steps/get_ctm.sh +++ b/egs/wsj/s5/steps/get_ctm.sh @@ -77,6 +77,7 @@ if [ $stage -le 0 ]; then set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \ lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \ lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-1best ark:- ark:- \| \ nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \ utils/int2sym.pl -f 5 $lang/words.txt \| \ $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1; diff --git a/egs/wsj/s5/steps/get_train_ctm.sh b/egs/wsj/s5/steps/get_train_ctm.sh index 10b29708d84..878e11e45ac 100755 --- a/egs/wsj/s5/steps/get_train_ctm.sh +++ b/egs/wsj/s5/steps/get_train_ctm.sh @@ -76,6 +76,7 @@ if [ $stage -le 0 ]; then "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \ '' '' ark:- \| \ lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-1best ark:- ark:- \| \ nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \ utils/int2sym.pl -f 5 $lang/words.txt \| \ gzip -c '>' $dir/ctm.JOB.gz || exit 1 diff --git a/src/latbin/nbest-to-ctm.cc b/src/latbin/nbest-to-ctm.cc index e396f315ba1..89c47449bcf 100644 --- a/src/latbin/nbest-to-ctm.cc +++ b/src/latbin/nbest-to-ctm.cc @@ -32,7 +32,12 @@ int main(int argc, char *argv[]) { "and must be in CompactLattice form where the transition-ids on the arcs\n" "have been aligned with the word boundaries... typically the input will\n" "be a lattice that has been piped through lattice-1best and then\n" - "lattice-align-words. It outputs ctm format (with integers in place of words),\n" + "lattice-align-words. On the other hand, whenever we directly pipe\n" + "the output of lattice-align-words-lexicon into nbest-to-ctm,\n" + "we need to put the command `lattice-1best ark:- ark:-` between them,\n" + "because even for linear lattices, lattice-align-words-lexicon can\n" + "in certain cases produce non-linear outputs (due to disambiguity\n" + "in the lexicon). It outputs ctm format (with integers in place of words),\n" "assuming the frame length is 0.01 seconds by default (change this with the\n" "--frame-length option). Note: the output is in the form\n" " 1 \n" @@ -42,6 +47,9 @@ int main(int argc, char *argv[]) { "Usage: nbest-to-ctm [options] \n" "e.g.: lattice-1best --acoustic-weight=0.08333 ark:1.lats | \\\n" " lattice-align-words data/lang/phones/word_boundary.int exp/dir/final.mdl ark:- ark:- | \\\n" + " nbest-to-ctm ark:- 1.ctm\n" + "e.g.: lattice-align-words-lexicon data/lang/phones/align_lexicon.int exp/dir/final.mdl ark:1.lats ark:- | \\\n" + " lattice-1best ark:- ark:- | \\\n" " nbest-to-ctm ark:- 1.ctm\n"; ParseOptions po(usage);