diff --git a/egs/ami/s5b/RESULTS_ihm b/egs/ami/s5b/RESULTS_ihm index 52b24e3a27c..0776bc05923 100644 --- a/egs/ami/s5b/RESULTS_ihm +++ b/egs/ami/s5b/RESULTS_ihm @@ -76,3 +76,8 @@ for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best %WER 21.5 | 13098 94486 | 81.8 11.0 7.2 3.3 21.5 54.6 | 0.090 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys %WER 21.9 | 12643 89985 | 80.8 12.3 6.9 2.7 21.9 52.5 | 0.183 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys + +# local/chain/tuning/run_tdnn_lstm_1i.sh --mic ihm --train-set train_cleaned --gmm tri3_cleaned +# cleanup + chain TDNN+LSTM model +%WER 20.8 | 13098 94489 | 82.0 10.0 8.0 2.8 20.8 53.2 | -0.096 | exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/decode_dev/ascore_11/dev_hires.ctm.filt.sys +%WER 20.7 | 12643 89980 | 81.7 11.5 6.8 2.5 20.7 51.8 | 0.015 | exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/decode_eval/ascore_11/eval_hires.ctm.filt.sys diff --git a/egs/ami/s5b/RESULTS_mdm b/egs/ami/s5b/RESULTS_mdm index de815c8dc3e..80eb152fc5d 100644 --- a/egs/ami/s5b/RESULTS_mdm +++ b/egs/ami/s5b/RESULTS_mdm @@ -79,8 +79,13 @@ # local/chain/multi_condition/run_tdnn.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned # cleanup + chain TDNN model, MDM original + IHM reverberated data, alignments from IHM data -# *** best system *** # for d in exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done %WER 35.8 | 14512 94498 | 68.2 17.2 14.6 4.0 35.8 64.9 | 0.632 | exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys %WER 39.1 | 13651 89967 | 64.3 18.4 17.3 3.3 39.1 65.2 | 0.607 | exp/mdm8/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys + +# local/chain/tuning/run_tdnn_lstm_1i.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +# cleanup + chain TDNN+LSTM model, MDM audio and alignments from IHM data +# *** best system *** +%WER 34.6 | 15116 94508 | 69.6 17.6 12.9 4.1 34.6 62.3 | 0.687 | exp/mdm8/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys +%WER 37.1 | 14343 90002 | 66.3 18.8 14.9 3.4 37.1 62.3 | 0.659 | exp/mdm8/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys diff --git a/egs/ami/s5b/RESULTS_sdm b/egs/ami/s5b/RESULTS_sdm index 7b1e56b5903..f0177a45078 100644 --- a/egs/ami/s5b/RESULTS_sdm +++ b/egs/ami/s5b/RESULTS_sdm @@ -88,4 +88,8 @@ %WER 42.7 | 14070 89982 | 60.9 21.0 18.0 3.6 42.7 64.5 | 0.571 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys - +# local/chain/tuning/run_tdnn_lstm_1i.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +# cleanup + chain TDNN model, SDM audio + alignments from ihm data. +# *** best system *** +%WER 37.6 | 15122 94495 | 66.1 18.7 15.1 3.7 37.6 63.2 | 0.646 | exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_dev/ascore_10/dev_hires_o4.ctm.filt.sys +%WER 40.9 | 13807 89961 | 62.4 20.0 17.6 3.3 40.9 65.7 | 0.612 | exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_eval/ascore_10/eval_hires_o4.ctm.filt.sys diff --git a/egs/ami/s5b/local/chain/compare_wer_general.sh b/egs/ami/s5b/local/chain/compare_wer_general.sh new file mode 100755 index 00000000000..225890daf5c --- /dev/null +++ b/egs/ami/s5b/local/chain/compare_wer_general.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +mic=$1; +shift; + +echo -n "System " +for x in $*; do printf "% 10s" $x; done +echo + +#for d in exp/sdm1/chain_cleaned/tdnn*/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done|grep eval_hires + + +echo -n "WER on dev " +for x in $*; do + wer=$(grep Sum exp/$mic/chain_cleaned/${x}/decode_dev*/*sc*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "WER on eval " +for x in $*; do + wer=$(grep Sum exp/$mic/chain_cleaned/${x}/decode_eval*/*sc*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "Final train prob " +for x in $*; do + prob=$(grep Overall exp/$mic/chain_cleaned/${x}/log/compute_prob_train.final.log | grep -v xent | awk '{print $8}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob " +for x in $*; do + prob=$(grep Overall exp/$mic/chain_cleaned/${x}/log/compute_prob_valid.final.log | grep -v xent | awk '{print $8}') + printf "% 10s" $prob +done +echo + +echo -n "Final train prob (xent) " +for x in $*; do + prob=$(grep Overall exp/$mic/chain_cleaned/${x}/log/compute_prob_train.final.log | grep -w xent | awk '{print $8}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob (xent) " +for x in $*; do + prob=$(grep Overall exp/$mic/chain_cleaned/${x}/log/compute_prob_valid.final.log | grep -w xent | awk '{print $8}') + printf "% 10s" $prob +done +echo diff --git a/egs/ami/s5b/local/chain/run_tdnn.sh b/egs/ami/s5b/local/chain/run_tdnn.sh deleted file mode 100755 index 8df62af8bad..00000000000 --- a/egs/ami/s5b/local/chain/run_tdnn.sh +++ /dev/null @@ -1,242 +0,0 @@ -#!/bin/bash - -# This is a chain-training script with TDNN neural networks. -# Please see RESULTS_* for examples of command lines invoking this script. - - -# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali true --mic sdm1 # rerunning with biphone -# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali false --mic sdm1 - -# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & - -# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --stage 12 & -# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & - -# local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned& - - -set -e -o pipefail - -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). -stage=0 -mic=ihm -nj=30 -min_seg_len=1.55 -use_ihm_ali=false -train_set=train_cleaned -gmm=tri3_cleaned # the gmm for the target data -ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). -num_threads_ubm=32 -nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned - -# The rest are configs specific to this script. Most of the parameters -# are just hardcoded at this level, in the commands below. -train_stage=-10 -tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. -common_egs_dir= # you can set this to use previously dumped egs. - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - - -if ! cuda-compiled; then - cat <data/lang_chain/topo - fi -fi - -if [ $stage -le 13 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ - data/lang $gmm_dir $lat_dir - rm $lat_dir/fsts.*.gz # save space -fi - -if [ $stage -le 14 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --context-opts "--context-width=2 --central-position=1" \ - --leftmost-questions-truncate -1 \ - --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir -fi - -if [ $stage -le 15 ]; then - mkdir -p $dir - - echo "$0: creating neural net configs"; - - steps/nnet3/tdnn/make_configs.py \ - --self-repair-scale-nonlinearity 0.00001 \ - --feat-dir data/$mic/${train_set}_sp_hires_comb \ - --ivector-dir $train_ivector_dir \ - --tree-dir $tree_dir \ - --relu-dim 450 \ - --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ - --use-presoftmax-prior-scale false \ - --xent-regularize 0.1 \ - --xent-separate-forward-affine true \ - --include-log-softmax false \ - --final-layer-normalize-target 1.0 \ - $dir/configs || exit 1; -fi - -if [ $stage -le 16 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage - fi - - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize 0.1 \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.dir "$common_egs_dir" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --trainer.num-chunk-per-minibatch 128 \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 4 \ - --trainer.optimization.num-jobs-initial 2 \ - --trainer.optimization.num-jobs-final 12 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.max-param-change 2.0 \ - --cleanup.remove-egs true \ - --feat-dir $train_data_dir \ - --tree-dir $tree_dir \ - --lat-dir $lat_dir \ - --dir $dir -fi - - -graph_dir=$dir/graph_${LM} -if [ $stage -le 17 ]; then - # Note: it might appear that this data/lang_chain directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - rm $dir/.error 2>/dev/null || true - for decode_set in dev eval; do - ( - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $nj --cmd "$decode_cmd" \ - --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --scoring-opts "--min-lmwt 5 " \ - $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; - ) || touch $dir/.error & - done - wait - if [ -f $dir/.error ]; then - echo "$0: something went wrong in decoding" - exit 1 - fi -fi -exit 0 \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/run_tdnn.sh b/egs/ami/s5b/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..61f8f499182 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..8df62af8bad --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,242 @@ +#!/bin/bash + +# This is a chain-training script with TDNN neural networks. +# Please see RESULTS_* for examples of command lines invoking this script. + + +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali true --mic sdm1 # rerunning with biphone +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali false --mic sdm1 + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --stage 12 & +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned& + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 15 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir data/$mic/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..a262f8e1860 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh @@ -0,0 +1,270 @@ +#!/bin/bash + +# This is a chain-training script with TDNN neural networks. +# Please see RESULTS_* for examples of command lines invoking this script. + + +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali true --mic sdm1 # rerunning with biphone +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali false --mic sdm1 + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --stage 12 & +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned& + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1b #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..64cde69e7dd --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh @@ -0,0 +1,257 @@ +#!/bin/bash + +# same as 1b but with shorter minibatches + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1c #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..ba136e67521 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,288 @@ +#!/bin/bash + +# TDNN+LSTM architecture similar to swbd/tdnn_lstm_1b +# results on sdm1 with ihm ali +#System tdnn1b tdnn_lstm1a +#WER on dev 39.9 38.9 +#WER on eval 43.9 42.2 +#Final train prob -0.186387 -0.142585 +#Final valid prob -0.259997 -0.251197 +#Final train prob (xent) -2.4593 -1.73176 +#Final valid prob (xent) -2.70347 -2.26965 + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..ed615a98e30 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,292 @@ +#!/bin/bash + +# same as 1a but the neural network has two more TDNN layers (0,3 0,3) +# above the lstm +# results on sdm1 with ihm ali + +#System 1a 1b +#WER on dev 38.9 39.6 +#WER on eval 42.2 42.9 +#Final train prob -0.142585 -0.152283 +#Final valid prob -0.251197 -0.253287 +#Final train prob (xent) -1.73176 -1.77542 +#Final valid prob (xent) -2.26965 -2.28851 + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1b #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn8 input=Append(0,3) dim=512 + relu-renorm-layer name=tdnn9 input=Append(0,3) dim=512 + + ## adding the layers for chain branch + output-layer name=output input=tdnn9 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=tdnn9 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh new file mode 100755 index 00000000000..ce719d6f2cb --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# same as 1a, but with more TDNN layers between each LSTM +# results on sdm1 with ihm ali +#System 1a 1c +#WER on dev 38.9 39.0 +#WER on eval 42.2 41.9 +#Final train prob -0.142585 -0.142951 +#Final valid prob -0.251197 -0.249901 +#Final train prob (xent) -1.73176 -1.71779 +#Final valid prob (xent) -2.26965 -2.22776 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1c #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..22967036cb2 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,293 @@ +#!/bin/bash + +# same as 1c, but with more TDNN layers between each LSTM +# results on sdm1 with ihm ali + +#System tdnn_lstm1c_sp_bi_ihmali_ld5 tdnn_lstm1d_sp_bi_ihmali_ld5 +#WER on dev 39.0 39.1 +#WER on eval 41.9 42.0 +#Final train prob -0.142951 -0.150625 +#Final valid prob -0.249901 -0.248819 +#Final train prob (xent) -1.71779 -1.75401 +#Final valid prob (xent) -2.22776 -2.24072 + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1d #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn10 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn11 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs false \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..6e73457a772 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,293 @@ +#!/bin/bash + +# same as 1c but with only right context for the TDNNs i.e., (0,3) in place +# of (-3,0,3) +# results on sdm1 with ihm ali + +#System tdnn_lstm1c_sp_bi_ihmali_ld5tdnn_lstm1e_sp_bi_ihmali_ld5 +#WER on dev 39.0 39.4 +#WER on eval 41.9 42.4 +#Final train prob -0.142951 -0.152498 +#Final valid prob -0.249901 -0.251393 +#Final train prob (xent) -1.71779 -1.77722 +#Final valid prob (xent) -2.22776 -2.26705 +# + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(0,3) dim=512 + relu-renorm-layer name=tdnn6 input=Append(0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(0,3) dim=512 + relu-renorm-layer name=tdnn8 input=Append(0,3) dim=512 + relu-renorm-layer name=tdnn9 input=Append(0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..3c4df056460 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,292 @@ +#!/bin/bash + +# same as 1a but the neural network has two more TDNN layers (0,3 0,3) +# above the lstm +# results on sdm1 with ihm ali +# +#System tdnn_lstm1a_sp_bi_ihmali_ld5tdnn_lstm1f_sp_bi_ihmali_ld5 +#WER on dev 38.9 39.4 +#WER on eval 42.2 42.7 +#Final train prob -0.142585 -0.15514 +#Final valid prob -0.251197 -0.253257 +#Final train prob (xent) -1.73176 -1.80786 +#Final valid prob (xent) -2.26965 -2.29771 + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1f #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn8 input=Append(0,3) dim=512 + tanh-layer name=tdnn9 input=Append(0,3) dim=512 + + ## adding the layers for chain branch + output-layer name=output input=tdnn9 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=tdnn9 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..cce5f2f5f3e --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,293 @@ +#!/bin/bash + +# same as 1c but with smaller minibatch +# using smaller minibatches seems to be better in TDNN+LSTM archs. +# not much difference in other archs. +# results on sdm1 using ihm ali +#System tdnn_lstm1c_sp_bi_ihmali_ld5tdnn_lstm1g_sp_bi_ihmali_ld5 +#WER on dev 39.0 38.3 +#WER on eval 41.9 41.6 +#Final train prob -0.142951 -0.138017 +#Final valid prob -0.249901 -0.238659 +#Final train prob (xent) -1.71779 -1.66834 +#Final valid prob (xent) -2.22776 -2.17419 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1g #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..c306849632a --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,294 @@ +#!/bin/bash + +# same as 1c but with one more stack of TDNN and LSTM layers +# results on sdm1 using ihm ali +#System tdnn_lstm1c_sp_bi_ihmali_ld5 tdnn_lstm1h_sp_bi_ihmali_ld5 +#WER on dev 39.0 39.4 +#WER on eval 41.9 42.6 +#Final train prob -0.142951 -0.157634 +#Final valid prob -0.249901 -0.24945 +#Final train prob (xent) -1.71779 -1.7585 +#Final valid prob (xent) -2.22776 -2.2512 + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1h #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=512 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn10 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn11 input=Append(-3,0,3) dim=512 + relu-renorm-layer name=tdnn12 input=Append(-3,0,3) dim=512 + lstmp-layer name=lstm4 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm4 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..29f64ae6620 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,292 @@ +#!/bin/bash + +# same as 1g but with TDNN output dim 1024 instead of 512 +# (num-params 1g:21309812 1i: 43447156) +# results on sdm1 using ihm ali +#System tdnn_lstm1g_sp_bi_ihmali_ld5 tdnn_lstm1i_sp_bi_ihmali_ld5 +#WER on dev 38.3 37.6 +#WER on eval 41.6 40.9 +#Final train prob -0.138017 -0.114135 +#Final valid prob -0.238659 -0.245208 +#Final train prob (xent) -1.66834 -1.47648 +#Final valid prob (xent) -2.17419 -2.16365 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --chain.left-deriv-truncate 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index 0a05babbfd3..b19ea6eafab 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -1,11 +1,23 @@ #!/bin/bash + +#System tdnn_7h tdnn_7j +#WER on train_dev(tg) 13.84 14.15 +#WER on train_dev(fg) 12.84 12.96 +#WER on eval2000(tg) 16.5 16.8 +#WER on eval2000(fg) 14.8 15.1 +#Final train prob -0.0889771-0.0910883 +#Final valid prob -0.113102 -0.112464 +#Final train prob (xent) -1.2533 -1.31768 +#Final valid prob (xent) -1.36743 -1.41603 + + set -e # configs for 'chain' affix= -stage=13 -train_stage=4 +stage=12 +train_stage=-10 get_egs_stage=-10 speed_perturb=true dir=exp/chain/tdnn_7j # Note: _sp will get added to this if $speed_perturb == true.