diff --git a/egs/ami/s5/local/chain/run_blstm_ami_5.sh b/egs/ami/s5/local/chain/run_blstm_ami_5.sh index 5943494d8e1..53221a2bd53 100755 --- a/egs/ami/s5/local/chain/run_blstm_ami_5.sh +++ b/egs/ami/s5/local/chain/run_blstm_ami_5.sh @@ -128,7 +128,7 @@ if [ $stage -le 17 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh index 522498d847d..e54b5f43128 100755 --- a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh @@ -192,7 +192,7 @@ if [ $stage -le 12 ]; then --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 0.99 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --cleanup.remove-egs $remove_egs \ --feat-dir data/train_rvb_min${min_seg_len}_hires \ --tree-dir $treedir \ diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh index c11420e5cfd..0ca6062e9c8 100755 --- a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh +++ b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh @@ -187,7 +187,7 @@ if [ $stage -le 12 ]; then --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 0.99 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --cleanup.remove-egs $remove_egs \ --feat-dir data/train_rvb_min${min_seg_len}_hires \ --tree-dir $treedir \ diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh index a48e7ed55af..d9b11f9fb21 100644 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh @@ -127,7 +127,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/multi_en/s5/local/chain/run_blstm_6h.sh b/egs/multi_en/s5/local/chain/run_blstm_6h.sh index 5a68947282a..df9b8002d0c 100644 --- a/egs/multi_en/s5/local/chain/run_blstm_6h.sh +++ b/egs/multi_en/s5/local/chain/run_blstm_6h.sh @@ -134,7 +134,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh index 9ab72b40ac2..a1be44cdbbf 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh @@ -154,7 +154,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh index 6e1712c5187..a4333e40b30 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh @@ -160,7 +160,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh index a1131f88de8..34dd378a7fe 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh @@ -185,7 +185,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh index 3155e21b618..ac22e858aea 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh @@ -159,7 +159,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh index f1a42cc175c..db0a0fe7b1a 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh @@ -159,7 +159,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh index 25dc1b2f1da..90afd1fb4cd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh @@ -181,7 +181,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh index a678fe22044..aa666e4c5ab 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh @@ -164,7 +164,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts="--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh index 45c2466c150..e32fdffb69d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -188,7 +188,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh index 28c9bb80eff..555afa467fa 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -184,7 +184,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 10 \ + --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width $chunk_width \ diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index ac163e03161..15679fb4061 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -222,10 +222,12 @@ def GetArgs(): help="Number of sequences to be processed in parallel every minibatch" ) parser.add_argument("--trainer.deriv-truncate-margin", type=int, dest='deriv_truncate_margin', default = None, - help="If specified, it is the number of frames that the derivative will be backpropagated through the chunk boundaries, " - "e.g., During BLSTM model training if the chunk-width=150 and deriv-truncate-margin=5, then the derivative will be " - "backpropagated up to t=-5 and t=154 in the forward and backward LSTM sequence respectively; " - "otherwise, the derivative will be backpropagated to the end of the sequence.") + help="(Relevant only for recurrent models). If specified, gives the margin " + "(in input frames) around the 'required' part of each chunk that the " + "derivatives are backpropagated to. If unset, the derivatives are " + "backpropagated all the way to the boundaries of the input data. E.g. 8 is " + "a reasonable setting. Note: the 'required' part of the chunk is defined by " + "the model's {left,right}-context.") # General options parser.add_argument("--stage", type=int, default=-4, @@ -675,8 +677,8 @@ def Train(args, run_opts): min_deriv_time = None max_deriv_time = None if not args.deriv_truncate_margin is None: - min_deriv_time = -args.deriv_truncate_margin - max_deriv_time = args.chunk_width - 1 + args.deriv_truncate_margin + min_deriv_time = -args.deriv_truncate_margin - model_left_context + max_deriv_time = args.chunk_width - 1 + args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = {1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters):