kaldi-asr · danpovey · Nov 21, 2016 · Oct 10, 2016 · Oct 17, 2016 · Oct 18, 2016
diff --git a/egs/rm/s5/RESULTS b/egs/rm/s5/RESULTS
@@ -230,8 +230,9 @@ for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/
 %WER 7.36 [ 923 / 12533, 85 ins, 148 del, 690 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch4/wer_13
 
 ### chain results ###
-# current best chain result with TDNN (check local/chain/run_tdnn_5f.sh)
-%WER 2.94 [ 369 / 12533, 51 ins, 71 del, 247 sub ] exp/chain/tdnn_5f/decode/wer_3_0.5
+# current best chain result with TDNN (check local/chain/run_tdnn_5g.sh)
+%WER 2.86 [ 358 / 12533, 46 ins, 61 del, 251 sub ] exp/chain/tdnn_5g/decode/wer_5_0.0
+%WER 2.71 [ 340 / 12533, 58 ins, 59 del, 223 sub ] exp/chain/tdnn_5n/decode/wer_4_0.0
 
 ### nnet1 results ###
 

diff --git a/egs/rm/s5/local/chain/run_tdnn_5g.sh b/egs/rm/s5/local/chain/run_tdnn_5g.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+
+# This is modified from run_tdnn_5f.sh, to use the old topology, as a baseline
+# to test the modified transition-model code (by which we hope to be able to
+# create more compact decoding graphs for chain models).
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+dir=exp/chain/tdnn_5g
+
+# training options
+num_epochs=12
+initial_effective_lrate=0.005
+final_effective_lrate=0.0005
+leftmost_questions_truncate=-1
+max_param_change=2.0
+final_layer_normalize_target=0.5
+num_jobs_initial=2
+num_jobs_final=4
+minibatch_size=128
+frames_per_eg=150
+remove_egs=false
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet2 setup, and you can skip them by setting "--stage 4" if you have already
+# run those things.
+
+ali_dir=exp/tri3b_ali
+treedir=exp/chain/tri4_5g_tree
+lang=data/lang_chain_5g
+
+local/online/run_nnet2_common.sh --stage $stage || exit 1;
+
+if [ $stage -le 4 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat exp/tri3b_ali/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/train \
+    data/lang exp/tri3b exp/tri3b_lats
+  rm exp/tri3b_lats/fsts.*.gz # save space
+fi
+
+if [ $stage -le 5 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo_orig.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 6 ]; then
+  # Build a tree using our new topology.
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+    --leftmost-questions-truncate $leftmost_questions_truncate \
+    --cmd "$train_cmd" 1200 data/train $lang $ali_dir $treedir
+fi
+
+if [ $stage -le 7 ]; then
+  mkdir -p $dir
+
+  echo "$0: creating neural net configs";
+
+  steps/nnet3/tdnn/make_configs.py \
+    --self-repair-scale-nonlinearity 0.00001 \
+    --feat-dir data/train \
+    --ivector-dir exp/nnet2_online/ivectors \
+    --tree-dir $treedir \
+    --relu-dim 450 \
+    --splice-indexes "-1,0,1 -2,-1,0,1 -3,0,3 -6,-3,0 0" \
+    --use-presoftmax-prior-scale false \
+    --xent-regularize 0.1 \
+    --xent-separate-forward-affine true \
+    --include-log-softmax false \
+    --final-layer-normalize-target 1.0 \
+   $dir/configs || exit 1;
+fi
+
+if [ $stage -le 8 ]; then
+ steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet2_online/ivectors \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=200" \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_eg \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1000000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.max-param-change $max_param_change \
+    --cleanup.remove-egs true \
+    --feat-dir data/train \
+    --tree-dir $treedir \
+    --lat-dir exp/tri3b_lats \
+    --dir $dir
+fi
+
+if [ $stage -le 9 ]; then
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 4 \
+    data/test exp/nnet2_online/extractor exp/nnet2_online/ivectors_test || exit 1;
+fi
+
+if [ $stage -le 10 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --scoring-opts "--min-lmwt 1" \
+    --nj 20 --cmd "$decode_cmd" \
+    --online-ivector-dir exp/nnet2_online/ivectors_test \
+    $dir/graph data/test $dir/decode || exit 1;
+fi
+
+if [ $stage -le 11 ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_ug $dir $dir/graph_ug
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj 20 --cmd "$decode_cmd" \
+    --online-ivector-dir exp/nnet2_online/ivectors_test \
+    $dir/graph_ug data/test $dir/decode_ug || exit 1;
+fi
+wait;
+exit 0;
diff --git a/egs/rm/s5/local/chain/run_tdnn_5f.sh → egs/rm/s5/local/chain/run_tdnn_5n.sh b/egs/rm/s5/local/chain/run_tdnn_5f.sh → egs/rm/s5/local/chain/run_tdnn_5n.sh
@@ -1,14 +1,17 @@
 #!/bin/bash
 
-# this script is a modified version of swbd/run_tdnn_5f.sh
+# this script is a modified version of run_tdnn_5g.sh. It uses
+# the new transition model and the python version of training scripts.
+
+
 
 set -e
 
 # configs for 'chain'
 stage=0
 train_stage=-10
 get_egs_stage=-10
-dir=exp/chain/tdnn_5f
+dir=exp/chain/tdnn_5n
 
 # training options
 num_epochs=12
@@ -43,13 +46,13 @@ fi
 # run those things.
 
 ali_dir=exp/tri3b_ali
-treedir=exp/chain/tri4_2y_tree
-lang=data/lang_chain_2y
+treedir=exp/chain/tri4_5n_tree
+lang=data/lang_chain_5n
 
 local/online/run_nnet2_common.sh --stage $stage || exit 1;
 
 if [ $stage -le 4 ]; then
-  # Get the alignments as lattices (gives the CTC training more freedom).
+  # Get the alignments as lattices (gives the chain training more freedom).
   # use the same num-jobs as the alignments
   nj=$(cat exp/tri3b_ali/num_jobs) || exit 1;
   steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/train \
@@ -78,51 +81,73 @@ if [ $stage -le 6 ]; then
 fi
 
 if [ $stage -le 7 ]; then
-  steps/nnet3/chain/train_tdnn.sh --stage $train_stage \
+  mkdir -p $dir
+
+  echo "$0: creating neural net configs";
+
+  steps/nnet3/tdnn/make_configs.py \
+    --self-repair-scale-nonlinearity 0.00001 \
+    --feat-dir data/train \
+    --ivector-dir exp/nnet2_online/ivectors \
+    --tree-dir $treedir \
+    --relu-dim 450 \
+    --splice-indexes "-1,0,1 -2,-1,0,1 -3,0,3 -6,-3,0 0" \
+    --use-presoftmax-prior-scale false \
     --xent-regularize 0.1 \
-    --leaky-hmm-coefficient 0.1 \
-    --l2-regularize 0.00005 \
-    --jesus-opts "--jesus-forward-input-dim 200  --jesus-forward-output-dim 500 --jesus-hidden-dim 2000 --jesus-stddev-scale 0.2 --final-layer-learning-rate-factor 0.25" \
-    --splice-indexes "-1,0,1 -2,-1,0,1 -3,0,3 -6,-3,0" \
-    --apply-deriv-weights false \
-    --frames-per-iter 1000000 \
-    --lm-opts "--num-extra-lm-states=200" \
-    --get-egs-stage $get_egs_stage \
-    --minibatch-size $minibatch_size \
-    --egs-opts "--frames-overlap-per-eg 0" \
-    --frames-per-eg $frames_per_eg \
-    --num-epochs $num_epochs --num-jobs-initial $num_jobs_initial --num-jobs-final $num_jobs_final \
-    --feat-type raw \
-    --online-ivector-dir exp/nnet2_online/ivectors \
-    --cmvn-opts "--norm-means=false --norm-vars=false" \
-    --initial-effective-lrate $initial_effective_lrate --final-effective-lrate $final_effective_lrate \
-    --max-param-change $max_param_change \
-    --cmd "$decode_cmd" \
-    --remove-egs $remove_egs \
-    data/train $treedir exp/tri3b_lats $dir  || exit 1;
+    --xent-separate-forward-affine true \
+    --include-log-softmax false \
+    --final-layer-normalize-target 1.0 \
+   $dir/configs || exit 1;
 fi
 
 if [ $stage -le 8 ]; then
+ steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet2_online/ivectors \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=200" \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_eg \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1000000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.max-param-change $max_param_change \
+    --cleanup.remove-egs true \
+    --feat-dir data/train \
+    --tree-dir $treedir \
+    --lat-dir exp/tri3b_lats \
+    --dir $dir
+fi
+
+if [ $stage -le 9 ]; then
   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 4 \
     data/test exp/nnet2_online/extractor exp/nnet2_online/ivectors_test || exit 1;
 fi
 
-if [ $stage -le 9 ]; then
+if [ $stage -le 10 ]; then
   # Note: it might appear that this $lang directory is mismatched, and it is as
   # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
   # the lang directory.
   utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context 20 --scoring-opts "--min-lmwt 1" \
+    --scoring-opts "--min-lmwt 1" \
     --nj 20 --cmd "$decode_cmd" \
     --online-ivector-dir exp/nnet2_online/ivectors_test \
     $dir/graph data/test $dir/decode || exit 1;
 fi
 
-if [ $stage -le 10 ]; then
+if [ $stage -le 11 ]; then
   utils/mkgraph.sh --self-loop-scale 1.0 data/lang_ug $dir $dir/graph_ug
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context 20 \
     --nj 20 --cmd "$decode_cmd" \
     --online-ivector-dir exp/nnet2_online/ivectors_test \
     $dir/graph_ug data/test $dir/decode_ug || exit 1;

diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo.py
@@ -2,6 +2,9 @@
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 
+# This script was modified around 11.11.2016, when the code was extended to
+# support having a different pdf-class on the self loop.
+
 # Generate a topology file.  This allows control of the number of states in the
 # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 # 'utils/gen_topo.pl' that generates a different type of topology, one that we
@@ -41,9 +44,8 @@
 # We make the transition-probs 0.5 so they normalize, to keep the code happy.
 # In fact, we always set the transition probability scale to 0.0 in the 'chain'
 # code, so they are never used.
-print("<State> 0 <PdfClass> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
-print("<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
-print("<State> 2 </State>")
+print("<State> 0 <ForwardPdfClass> 0 <SelfLoopPdfClass> 1 <Transition> 0 0.5 <Transition> 1 0.5 </State>")
+print("<State> 1 </State>")
 print("</TopologyEntry>")
 print("</Topology>")
 
diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo_orig.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo_orig.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+
+# This file is as ./gen_topo.py used to be (before we extended the transition-model
+# code to support having a different self-loop pdf-class).  It is included
+# here for baseline and testing purposes.
+
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs.  This is a modified version of
+# 'utils/gen_topo.pl' that generates a different type of topology, one that we
+# believe should be useful in the 'chain' model.  Note: right now it doesn't
+# have any real options, and it treats silence and nonsilence the same.  The
+# intention is that you write different versions of this script, or add options,
+# if you experiment with it.
+
+from __future__ import print_function
+import argparse
+
+
+parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
+                                             "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
+                                             "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
+                                 epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
+parser.add_argument("nonsilence_phones", type=str,
+                    help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
+parser.add_argument("silence_phones", type=str,
+                    help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
+
+args = parser.parse_args()
+
+silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
+nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
+all_phones = silence_phones +  nonsilence_phones
+
+print("<Topology>")
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in all_phones]))
+print("</ForPhones>")
+# The next two lines may look like a bug, but they are as intended.  State 0 has
+# no self-loop, it happens exactly once.  And it can go either to state 1 (with
+# a self-loop) or to state 2, so we can have zero or more instances of state 1
+# following state 0.
+# We make the transition-probs 0.5 so they normalize, to keep the code happy.
+# In fact, we always set the transition probability scale to 0.0 in the 'chain'
+# code, so they are never used.
+print("<State> 0 <PdfClass> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
+print("<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
+print("<State> 2 </State>")
+print("</TopologyEntry>")
+print("</Topology>")
diff --git a/src/bin/acc-tree-stats.cc b/src/bin/acc-tree-stats.cc
@@ -128,5 +128,3 @@ int main(int argc, char *argv[]) {
     return -1;
   }
 }
-
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -128,5 +128,3 @@ int main(int argc, char *argv[]) {
		return -1;
		}
		}