Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions egs/ami/s5b/RESULTS_ihm
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@
# local/nnet3/run_blstm.sh --mic ihm
# nnet3 xent BLSTM with data cleaning
# for d in exp/ihm/nnet3_cleaned/lstm_bidirectional_sp/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
# Note: the results are with ClipGradientComponent, which may be different from with BackpropTruncationComponent
%WER 22.3 | 13098 94494 | 80.9 11.7 7.4 3.2 22.3 55.7 | -0.618 | exp/ihm/nnet3_cleaned/lstm_bidirectional_sp/decode_dev/ascore_10/dev_hires.ctm.filt.sys
%WER 22.5 | 12643 89962 | 80.2 12.7 7.1 2.7 22.5 53.4 | -0.476 | exp/ihm/nnet3_cleaned/lstm_bidirectional_sp/decode_eval/ascore_10/eval_hires.ctm.filt.sys
%WER 22.4 | 13098 94483 | 80.8 11.6 7.6 3.2 22.4 55.4 | -0.620 | exp/ihm/nnet3_cleaned/lstm_bidirectional_sp/decode_dev/ascore_10/dev_hires.ctm.filt.sys
%WER 22.4 | 12643 89977 | 80.3 12.5 7.2 2.7 22.4 53.6 | -0.503 | exp/ihm/nnet3_cleaned/lstm_bidirectional_sp/decode_eval/ascore_10/eval_hires.ctm.filt.sys

############################################

Expand Down
5 changes: 2 additions & 3 deletions egs/ami/s5b/RESULTS_sdm
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@
# xent BLSTM system; cleaned data and IHM alignments.
# local/nnet3/run_blstm.sh --mic sdm1 --use-ihm-ali true
# for d in exp/sdm1/nnet3_cleaned/lstm_bidirectional_sp/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done
# Note: the results are with ClipGradientComponent, which may be different from with BackpropTruncationComponent
%WER 37.8 | 14633 94518 | 67.1 22.3 10.7 4.9 37.8 64.2 | 0.745 | exp/sdm1/nnet3_cleaned/lstm_bidirectional_sp_ihmali/decode_dev/ascore_11/dev_hires_o4.ctm.filt.sys
%WER 41.4 | 13809 89628 | 62.7 24.1 13.2 4.1 41.4 65.2 | 0.723 | exp/sdm1/nnet3_cleaned/lstm_bidirectional_sp_ihmali/decode_eval/ascore_11/eval_hires_o4.ctm.filt.sys
%WER 37.9 | 15953 94512 | 66.7 22.0 11.3 4.7 37.9 58.9 | 0.734 | exp/sdm1/nnet3_cleaned/lstm_bidirectional_sp_ihmali/decode_dev/ascore_12/dev_hires_o4.ctm.filt.sys
%WER 41.2 | 13271 89635 | 62.9 23.8 13.2 4.2 41.2 67.8 | 0.722 | exp/sdm1/nnet3_cleaned/lstm_bidirectional_sp_ihmali/decode_eval/ascore_11/eval_hires_o4.ctm.filt.sys

# =========================

Expand Down
16 changes: 8 additions & 8 deletions egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# clipping-threshold=30 [nnet3 LSTMs use a gradient clipping component at the recurrent connections.
# This is the threshold used to decide if clipping has to be activated ]
# zeroing-interval=20 [interval at which we (possibly) zero out the recurrent derivatives.]
# zeroing-threshold=20 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# zeroing-threshold=15 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# self_repair_scale_nonlinearity=1e-5 [It is a constant scaling the self-repair vector computed in derived classes of NonlinearComponent]
# i.e., SigmoidComponent, TanhComponent and RectifiedLinearComponent ]
# ng-per-element-scale-options='' [Additional options used for the diagonal matrices in the LSTM ]
Expand All @@ -45,7 +45,7 @@ def set_default_configs(self):
'ng-affine-options' : ' max-change=0.75 ',
'self-repair-scale-nonlinearity' : 0.00001,
'zeroing-interval' : 20,
'zeroing-threshold' : 3.0
'zeroing-threshold' : 15.0
}

def set_derived_configs(self):
Expand Down Expand Up @@ -226,7 +226,7 @@ def generate_lstm_config(self):
# clipping-threshold=30 [nnet3 LSTMs use a gradient clipping component at the recurrent connections.
# This is the threshold used to decide if clipping has to be activated ]
# zeroing-interval=20 [interval at which we (possibly) zero out the recurrent derivatives.]
# zeroing-threshold=20 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# zeroing-threshold=15 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# self_repair_scale_nonlinearity=1e-5 [It is a constant scaling the self-repair vector computed in derived classes of NonlinearComponent]
# i.e., SigmoidComponent, TanhComponent and RectifiedLinearComponent ]
# ng-per-element-scale-options='' [Additional options used for the diagonal matrices in the LSTM ]
Expand All @@ -249,7 +249,7 @@ def set_default_configs(self):
'ng-affine-options' : ' max-change=0.75 ',
'self-repair-scale-nonlinearity' : 0.00001,
'zeroing-interval' : 20,
'zeroing-threshold' : 3.0
'zeroing-threshold' : 15.0
}

def set_derived_configs(self):
Expand Down Expand Up @@ -568,7 +568,7 @@ def generate_lstm_config(self):
# clipping-threshold=30 [nnet3 LSTMs use a gradient clipping component at the recurrent connections.
# This is the threshold used to decide if clipping has to be activated ]
# zeroing-interval=20 [interval at which we (possibly) zero out the recurrent derivatives.]
# zeroing-threshold=20 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# zeroing-threshold=15 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# lstm-nonlinearity-options=' max-change=0.75 ' [Options string to pass into the LSTM nonlinearity component.]
# ng-affine-options=' max-change=1.5 ' [Additional options used for the full matrices in the LSTM, can be used to
# do things like set biases to initialize to 1]
Expand All @@ -582,7 +582,7 @@ def set_default_configs(self):
'cell-dim' : -1, # this is a compulsory argument
'clipping-threshold' : 30.0,
'zeroing-interval' : 20,
'zeroing-threshold' : 3.0,
'zeroing-threshold' : 15.0,
'delay' : -1,
# if you want to set 'self-repair-scale' (c.f. the
# self-repair-scale-nonlinearity config value in older LSTM layers), you can
Expand Down Expand Up @@ -719,7 +719,7 @@ def generate_lstm_config(self):
# clipping-threshold=30 [nnet3 LSTMs use a gradient clipping component at the recurrent connections.
# This is the threshold used to decide if clipping has to be activated ]
# zeroing-interval=20 [interval at which we (possibly) zero out the recurrent derivatives.]
# zeroing-threshold=20 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# zeroing-threshold=15 [We only zero out the derivs every zeroing-interval, if derivs exceed this value.]
# lstm-nonlinearity-options=' max-change=0.75 ' [Options string to pass into the LSTM nonlinearity component.]
# ng-affine-options=' max-change=1.5 ' [Additional options used for the full matrices in the LSTM, can be used to
# do things like set biases to initialize to 1]
Expand All @@ -744,7 +744,7 @@ def set_default_configs(self):
# larger max-change than the normal value of 0.75.
'ng-affine-options' : ' max-change=1.5',
'zeroing-interval' : 20,
'zeroing-threshold' : 3.0
'zeroing-threshold' : 15.0
}

def set_derived_configs(self):
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/steps/nnet3/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ def AddLstmLayer(config_lines,
name, input, cell_dim,
recurrent_projection_dim = 0,
non_recurrent_projection_dim = 0,
clipping_threshold = 1.0,
zeroing_threshold = 3.0,
clipping_threshold = 30.0,
zeroing_threshold = 15.0,
zeroing_interval = 20,
ng_per_element_scale_options = "",
ng_affine_options = "",
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/lstm/make_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def GetArgs():
"if clipping-threshold=0 no clipping is done", default=30)
parser.add_argument("--zeroing-threshold", type=float,
help="zeroing threshold used in BackpropTruncation components, "
"if zeroing-threshold=0 no periodic zeroing is done", default=3.0)
"if zeroing-threshold=0 no periodic zeroing is done", default=15.0)
parser.add_argument("--zeroing-interval", type=int,
help="zeroing interval used in BackpropTruncation components", default=20)
parser.add_argument("--self-repair-scale-nonlinearity", type=float,
Expand Down
4 changes: 2 additions & 2 deletions src/nnet3/nnet-general-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -991,8 +991,8 @@ void BackpropTruncationComponent::Init(int32 dim,
void BackpropTruncationComponent::InitFromConfig(ConfigLine *cfl) {
int32 dim = 0;
bool ok = cfl->GetValue("dim", &dim);
BaseFloat clipping_threshold = 15.0;
BaseFloat zeroing_threshold = 2.0;
BaseFloat clipping_threshold = 30.0;
BaseFloat zeroing_threshold = 15.0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Larger values of these quantities are more dangerous, i.e. more likely to lead to instability.
I don't think it's sufficient to just test this on one setup, because it's the potential for divergence that this is supposed to guard against. Have you done any other tests?

int32 zeroing_interval = 20, recurrence_interval = 1;
cfl->GetValue("clipping-threshold", &clipping_threshold);
cfl->GetValue("zeroing-threshold", &zeroing_threshold);
Expand Down