diff --git a/egs/swbd/s5c/RESULTS b/egs/swbd/s5c/RESULTS
index 6223c4ca319..1c1b63c2773 100644
--- a/egs/swbd/s5c/RESULTS
+++ b/egs/swbd/s5c/RESULTS
@@ -152,13 +152,23 @@ exit 0
 %WER 19.4 | 2628 21594 | 82.7 12.0 5.3 2.1 19.4 54.9 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 %WER 20.8 | 2628 21594 | 81.3 13.1 5.6 2.2 20.8 56.9 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 
-# bidirectional LSTM with the same configuration as the above experiment, plus self-repair of all nonliearities and clipgradient activated
-%WER 10.4 | 1831 21395 | 90.5 6.2 3.3 0.9 10.4 44.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
-%WER 11.3 | 1831 21395 | 89.8 6.8 3.3 1.1 11.3 46.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
+# bidirectional LSTM with the same configuration as the above experiment, with self-repair of all nonliearities and clipgradient, and max-change-per-component activated
+%WER 14.9 | 4459 42989 | 86.7 9.0 4.3 1.6 14.9 50.5 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 15.9 | 4459 42989 | 85.7 9.8 4.5 1.7 15.9 52.3 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 10.2 | 1831 21395 | 90.8 6.1 3.2 1.0 10.2 44.4 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 11.2 | 1831 21395 | 89.9 6.8 3.3 1.1 11.2 46.6 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 19.4 | 2628 21594 | 82.7 11.8 5.4 2.2 19.4 54.5 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
+%WER 20.6 | 2628 21594 | 81.5 12.8 5.7 2.2 20.6 56.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.5/eval2000_hires.ctm.callhm.filt.sys
+
+(
+# bidirectional LSTM with the same configuration as the above experiment, with self-repair of all nonliearities and clipgradient activated
 %WER 15.0 | 4459 42989 | 86.5 9.1 4.5 1.5 15.0 50.4 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.filt.sys
 %WER 16.0 | 4459 42989 | 85.6 9.9 4.5 1.6 16.0 52.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 10.4 | 1831 21395 | 90.5 6.2 3.3 0.9 10.4 44.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 11.3 | 1831 21395 | 89.8 6.8 3.3 1.1 11.3 46.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
 %WER 19.6 | 2628 21594 | 82.5 12.1 5.5 2.1 19.6 54.8 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 %WER 20.7 | 2628 21594 | 81.4 12.9 5.7 2.2 20.7 56.8 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
+)
 
 # results with nnet3 tdnn: local/nnet3/run_tdnn.sh (11.10.2015) (2 epoch training on speed-perturbed and volume perturbed data)
 %WER 12.1 | 1831 21395 | 89.1 7.1 3.8 1.3 12.1 48.1 | exp/nnet3/tdnn_sp/decode_eval2000_hires_sw1_fsh_fg/score_12_0.0/eval2000_hires.ctm.swbd.filt.sys
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh
old mode 100644
new mode 100755
index 5379149c9bd..26cdaed29d7
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh
@@ -130,6 +130,7 @@ if [ $stage -le 12 ]; then
     --recurrent-projection-dim 256 \
     --non-recurrent-projection-dim 256 \
     --label-delay $label_delay \
+    --self-repair-scale-clipgradient 1.0 \
    $dir/configs || exit 1;
 
 fi
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh
old mode 100644
new mode 100755
diff --git a/egs/wsj/s5/steps/nnet3/components.py b/egs/wsj/s5/steps/nnet3/components.py
index 9b9ce4a54ad..cf755a8d2ec 100644
--- a/egs/wsj/s5/steps/nnet3/components.py
+++ b/egs/wsj/s5/steps/nnet3/components.py
@@ -90,23 +90,29 @@ def AddPermuteLayer(config_lines, name, input, column_map):
     return {'descriptor': '{0}_permute'.format(name),
             'dimension': input['dimension']}
 
-def AddAffineLayer(config_lines, name, input, output_dim, ng_affine_options = ""):
+def AddAffineLayer(config_lines, name, input, output_dim, ng_affine_options = "", max_change_per_component = 0.75):
     components = config_lines['components']
     component_nodes = config_lines['component-nodes']
 
-    components.append("component name={0}_affine type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input['dimension'], output_dim, ng_affine_options))
+    # Per-component max-change option
+    max_change_options = "max-change={0:.2f}".format(max_change_per_component) if max_change_per_component is not None else ''
+ 
+    components.append("component name={0}_affine type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, input['dimension'], output_dim, ng_affine_options, max_change_options))
     component_nodes.append("component-node name={0}_affine component={0}_affine input={1}".format(name, input['descriptor']))
 
     return {'descriptor':  '{0}_affine'.format(name),
             'dimension': output_dim}
 
-def AddAffRelNormLayer(config_lines, name, input, output_dim, ng_affine_options = " bias-stddev=0 ", norm_target_rms = 1.0, self_repair_scale = None):
+def AddAffRelNormLayer(config_lines, name, input, output_dim, ng_affine_options = " bias-stddev=0 ", norm_target_rms = 1.0, self_repair_scale = None, max_change_per_component = 0.75):
     components = config_lines['components']
     component_nodes = config_lines['component-nodes']
 
     # self_repair_scale is a constant scaling the self-repair vector computed in RectifiedLinearComponent
     self_repair_string = "self-repair-scale={0:.10f}".format(self_repair_scale) if self_repair_scale is not None else ''
-    components.append("component name={0}_affine type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input['dimension'], output_dim, ng_affine_options))
+    # Per-component max-change option
+    max_change_options = "max-change={0:.2f}".format(max_change_per_component) if max_change_per_component is not None else ''
+ 
+    components.append("component name={0}_affine type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, input['dimension'], output_dim, ng_affine_options, max_change_options))
     components.append("component name={0}_relu type=RectifiedLinearComponent dim={1} {2}".format(name, output_dim, self_repair_string))
     components.append("component name={0}_renorm type=NormalizeComponent dim={1} target-rms={2}".format(name, output_dim, norm_target_rms))
 
@@ -244,6 +250,7 @@ def AddOutputLayer(config_lines, input, label_delay = None, suffix=None, objecti
 
 def AddFinalLayer(config_lines, input, output_dim,
         ng_affine_options = " param-stddev=0 bias-stddev=0 ",
+        max_change_per_component = 1.5,
         label_delay=None,
         use_presoftmax_prior_scale = False,
         prior_scale_file = None,
@@ -261,7 +268,7 @@ def AddFinalLayer(config_lines, input, output_dim,
 
     prev_layer_output = AddAffineLayer(config_lines,
             final_node_prefix , input, output_dim,
-            ng_affine_options)
+            ng_affine_options, max_change_per_component)
     if include_log_softmax:
         if use_presoftmax_prior_scale :
             components.append('component name={0}-fixed-scale type=FixedScaleComponent scales={1}'.format(final_node_prefix, prior_scale_file))
@@ -288,7 +295,8 @@ def AddLstmLayer(config_lines,
                  ng_affine_options = "",
                  lstm_delay = -1,
                  self_repair_scale_nonlinearity = None,
-                 self_repair_scale_clipgradient = None):
+                 self_repair_scale_clipgradient = None,
+                 max_change_per_component = 0.75):
     assert(recurrent_projection_dim >= 0 and non_recurrent_projection_dim >= 0)
     components = config_lines['components']
     component_nodes = config_lines['component-nodes']
@@ -316,24 +324,26 @@ def AddLstmLayer(config_lines,
     self_repair_clipgradient_string = "self-repair-scale={0:.2f}".format(self_repair_scale_clipgradient) if self_repair_scale_clipgradient is not None else ''
     # Natural gradient per element scale parameters
     ng_per_element_scale_options += " param-mean=0.0 param-stddev=1.0 "
+    # Per-component max-change option
+    max_change_options = "max-change={0:.2f}".format(max_change_per_component) if max_change_per_component is not None else ''
     # Parameter Definitions W*(* replaced by - to have valid names)
     components.append("# Input gate control : W_i* matrices")
-    components.append("component name={0}_W_i-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options))
+    components.append("component name={0}_W_i-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options, max_change_options))
     components.append("# note : the cell outputs pass through a diagonal matrix")
-    components.append("component name={0}_w_ic type=NaturalGradientPerElementScaleComponent  dim={1} {2}".format(name, cell_dim, ng_per_element_scale_options))
+    components.append("component name={0}_w_ic type=NaturalGradientPerElementScaleComponent  dim={1} {2} {3}".format(name, cell_dim, ng_per_element_scale_options, max_change_options))
 
     components.append("# Forget gate control : W_f* matrices")
-    components.append("component name={0}_W_f-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options))
+    components.append("component name={0}_W_f-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options, max_change_options))
     components.append("# note : the cell outputs pass through a diagonal matrix")
-    components.append("component name={0}_w_fc type=NaturalGradientPerElementScaleComponent  dim={1} {2}".format(name, cell_dim, ng_per_element_scale_options))
+    components.append("component name={0}_w_fc type=NaturalGradientPerElementScaleComponent  dim={1} {2} {3}".format(name, cell_dim, ng_per_element_scale_options, max_change_options))
 
     components.append("#  Output gate control : W_o* matrices")
-    components.append("component name={0}_W_o-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options))
+    components.append("component name={0}_W_o-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options, max_change_options))
     components.append("# note : the cell outputs pass through a diagonal matrix")
-    components.append("component name={0}_w_oc type=NaturalGradientPerElementScaleComponent  dim={1} {2}".format(name, cell_dim, ng_per_element_scale_options))
+    components.append("component name={0}_w_oc type=NaturalGradientPerElementScaleComponent  dim={1} {2} {3}".format(name, cell_dim, ng_per_element_scale_options, max_change_options))
 
     components.append("# Cell input matrices : W_c* matrices")
-    components.append("component name={0}_W_c-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options))
+    components.append("component name={0}_W_c-xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, input_dim + recurrent_projection_dim, cell_dim, ng_affine_options, max_change_options))
 
 
     components.append("# Defining the non-linearities")
@@ -385,7 +395,7 @@ def AddLstmLayer(config_lines,
     # add the recurrent connections
     if (add_recurrent_projection and add_non_recurrent_projection):
         components.append("# projection matrices : Wrm and Wpm")
-        components.append("component name={0}_W-m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, recurrent_projection_dim + non_recurrent_projection_dim, ng_affine_options))
+        components.append("component name={0}_W-m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, cell_dim, recurrent_projection_dim + non_recurrent_projection_dim, ng_affine_options, max_change_options))
         components.append("component name={0}_r type=ClipGradientComponent dim={1} clipping-threshold={2} norm-based-clipping={3} {4}".format(name, recurrent_projection_dim, clipping_threshold, norm_based_clipping, self_repair_clipgradient_string))
         component_nodes.append("# r_t and p_t")
         component_nodes.append("component-node name={0}_rp_t component={0}_W-m input={0}_m_t".format(name))
@@ -396,7 +406,7 @@ def AddLstmLayer(config_lines,
 
     elif add_recurrent_projection:
         components.append("# projection matrices : Wrm")
-        components.append("component name={0}_Wrm type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, recurrent_projection_dim, ng_affine_options))
+        components.append("component name={0}_Wrm type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3} {4}".format(name, cell_dim, recurrent_projection_dim, ng_affine_options, max_change_options))
         components.append("component name={0}_r type=ClipGradientComponent dim={1} clipping-threshold={2} norm-based-clipping={3} {4}".format(name, recurrent_projection_dim, clipping_threshold, norm_based_clipping, self_repair_clipgradient_string))
         component_nodes.append("# r_t")
         component_nodes.append("component-node name={0}_r_t_preclip component={0}_Wrm input={0}_m_t".format(name))
@@ -425,7 +435,8 @@ def AddBLstmLayer(config_lines,
                   ng_affine_options = "",
                   lstm_delay = [-1,1],
                   self_repair_scale_nonlinearity = None,
-                  self_repair_scale_clipgradient = None):
+                  self_repair_scale_clipgradient = None,
+                  max_change_per_component = 0.75):
     assert(len(lstm_delay) == 2 and lstm_delay[0] < 0 and lstm_delay[1] > 0)
     output_forward = AddLstmLayer(config_lines, "{0}_forward".format(name), input, cell_dim,
                                   recurrent_projection_dim, non_recurrent_projection_dim,
@@ -433,14 +444,16 @@ def AddBLstmLayer(config_lines,
                                   ng_per_element_scale_options, ng_affine_options,
                                   lstm_delay = lstm_delay[0],
                                   self_repair_scale_nonlinearity = self_repair_scale_nonlinearity,
-                                  self_repair_scale_clipgradient = self_repair_scale_clipgradient)
+                                  self_repair_scale_clipgradient = self_repair_scale_clipgradient,
+                                  max_change_per_component = max_change_per_component)
     output_backward = AddLstmLayer(config_lines, "{0}_backward".format(name), input, cell_dim,
                                    recurrent_projection_dim, non_recurrent_projection_dim,
                                    clipping_threshold, norm_based_clipping,
                                    ng_per_element_scale_options, ng_affine_options,
                                    lstm_delay = lstm_delay[1],
                                    self_repair_scale_nonlinearity = self_repair_scale_nonlinearity,
-                                   self_repair_scale_clipgradient = self_repair_scale_clipgradient)
+                                   self_repair_scale_clipgradient = self_repair_scale_clipgradient,
+                                   max_change_per_component = max_change_per_component)
     output_descriptor = 'Append({0}, {1})'.format(output_forward['descriptor'], output_backward['descriptor'])
     output_dim = output_forward['dimension'] + output_backward['dimension']
 
diff --git a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py
index 53739f0f9ce..8e6e3d8e0e2 100755
--- a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py
+++ b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py
@@ -50,6 +50,12 @@ def GetArgs():
                         default=0.0)
     parser.add_argument("--include-log-softmax", type=str, action=nnet3_train_lib.StrToBoolAction,
                         help="add the final softmax layer ", default=True, choices = ["false", "true"])
+    parser.add_argument("--max-change-per-component", type=float,
+                        help="Enforces per-component max change (except for the final affine layer). "
+                        "if 0 it would not be enforced.", default=0.75)
+    parser.add_argument("--max-change-per-component-final", type=float,
+                        help="Enforces per-component max change for the final affine layer. "
+                        "if 0 it would not be enforced.", default=1.5)
 
     # LSTM options
     parser.add_argument("--num-lstm-layers", type=int,
@@ -122,6 +128,9 @@ def CheckArgs(args):
     if not args.ivector_dim >= 0:
         raise Exception("ivector-dim has to be non-negative")
 
+    if not args.max_change_per_component >= 0 or not args.max_change_per_component_final >= 0:
+        raise Exception("max-change-per-component and max_change-per-component-final should be non-negative")
+
     if (args.num_lstm_layers < 1):
         sys.exit("--num-lstm-layers has to be a positive integer")
     if (args.clipping_threshold < 0):
@@ -215,7 +224,8 @@ def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets,
                 norm_based_clipping, clipping_threshold,
                 ng_per_element_scale_options, ng_affine_options,
                 label_delay, include_log_softmax, xent_regularize,
-                self_repair_scale_nonlinearity, self_repair_scale_clipgradient):
+                self_repair_scale_nonlinearity, self_repair_scale_clipgradient,
+                max_change_per_component, max_change_per_component_final):
 
     config_lines = {'components':[], 'component-nodes':[]}
 
@@ -238,22 +248,27 @@ def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets,
                                                     recurrent_projection_dim, non_recurrent_projection_dim,
                                                     clipping_threshold, norm_based_clipping,
                                                     ng_per_element_scale_options, ng_affine_options,
-                                                    lstm_delay = lstm_delay[i], self_repair_scale_nonlinearity = self_repair_scale_nonlinearity, self_repair_scale_clipgradient = self_repair_scale_clipgradient)
+                                                    lstm_delay = lstm_delay[i],
+                                                    self_repair_scale_nonlinearity = self_repair_scale_nonlinearity, self_repair_scale_clipgradient = self_repair_scale_clipgradient,
+                                                    max_change_per_component = max_change_per_component)
         else: # add a uni-directional LSTM layer
             prev_layer_output = nodes.AddLstmLayer(config_lines, "Lstm{0}".format(i+1),
                                                    prev_layer_output, cell_dim,
                                                    recurrent_projection_dim, non_recurrent_projection_dim,
                                                    clipping_threshold, norm_based_clipping,
                                                    ng_per_element_scale_options, ng_affine_options,
-                                                   lstm_delay = lstm_delay[i][0], self_repair_scale_nonlinearity = self_repair_scale_nonlinearity, self_repair_scale_clipgradient = self_repair_scale_clipgradient)
+                                                   lstm_delay = lstm_delay[i][0],
+                                                   self_repair_scale_nonlinearity = self_repair_scale_nonlinearity, self_repair_scale_clipgradient = self_repair_scale_clipgradient,
+                                                   max_change_per_component = max_change_per_component)
         # make the intermediate config file for layerwise discriminative
         # training
-        nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, label_delay = label_delay, include_log_softmax = include_log_softmax)
+        nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax)
 
 
         if xent_regularize != 0.0:
             nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
                                 include_log_softmax = True, label_delay = label_delay,
+                                max_change_per_component = max_change_per_component_final,
                                 name_affix = 'xent')
 
         config_files['{0}/layer{1}.config'.format(config_dir, i+1)] = config_lines
@@ -262,14 +277,15 @@ def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets,
     for i in range(num_lstm_layers, num_hidden_layers):
         prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "L{0}".format(i+1),
                                                prev_layer_output, hidden_dim,
-                                               ng_affine_options, self_repair_scale = self_repair_scale_nonlinearity)
+                                               ng_affine_options, self_repair_scale = self_repair_scale_nonlinearity, max_change_per_component = max_change_per_component)
         # make the intermediate config file for layerwise discriminative
         # training
-        nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, label_delay = label_delay, include_log_softmax = include_log_softmax)
+        nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax)
 
         if xent_regularize != 0.0:
             nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
                                 include_log_softmax = True, label_delay = label_delay,
+                                max_change_per_component = max_change_per_component_final,
                                 name_affix = 'xent')
 
         config_files['{0}/layer{1}.config'.format(config_dir, i+1)] = config_lines
@@ -326,7 +342,9 @@ def Main():
                 include_log_softmax = args.include_log_softmax,
                 xent_regularize = args.xent_regularize,
                 self_repair_scale_nonlinearity = args.self_repair_scale_nonlinearity,
-                self_repair_scale_clipgradient = args.self_repair_scale_clipgradient)
+                self_repair_scale_clipgradient = args.self_repair_scale_clipgradient,
+                max_change_per_component = args.max_change_per_component,
+                max_change_per_component_final = args.max_change_per_component_final)
 
 if __name__ == "__main__":
     Main()
diff --git a/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py b/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py
index bac260e93bc..60b291c9481 100755
--- a/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py
+++ b/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py
@@ -95,6 +95,12 @@ def GetArgs():
     parser.add_argument("--final-layer-normalize-target", type=float,
                         help="RMS target for final layer (set to <1 if final layer learns too fast",
                         default=1.0)
+    parser.add_argument("--max-change-per-component", type=float,
+                        help="Enforces per-component max change (except for the final affine layer). "
+                        "if 0 it would not be enforced.", default=0.75)
+    parser.add_argument("--max-change-per-component-final", type=float,
+                        help="Enforces per-component max change for the final affine layer. "
+                        "if 0 it would not be enforced.", default=1.5)
     parser.add_argument("--subset-dim", type=int, default=0,
                         help="dimension of the subset of units to be sent to the central frame")
     parser.add_argument("--pnorm-input-dim", type=int,
@@ -204,6 +210,9 @@ def CheckArgs(args):
         args.add_lda = False
         warnings.warn("--add-lda is set to false as CNN layers are used.")
 
+    if not args.max_change_per_component >= 0 or not args.max_change_per_component_final >= 0:
+        raise Exception("max-change-per-component and max_change-per-component-final should be non-negative")
+
     return args
 
 def AddConvMaxpLayer(config_lines, name, input, args):
@@ -333,6 +342,7 @@ def MakeConfigs(config_dir, splice_indexes_string,
                 xent_regularize,
                 xent_separate_forward_affine,
                 self_repair_scale,
+                max_change_per_component, max_change_per_component_final,
                 objective_type):
 
     parsed_splice_output = ParseSpliceString(splice_indexes_string.strip())
@@ -426,13 +436,15 @@ def MakeConfigs(config_dir, splice_indexes_string,
             if nonlin_type == "relu" :
                 prev_layer_output_chain = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_chain",
                                                                    prev_layer_output, nonlin_output_dim,
+                                                                   norm_target_rms = final_layer_normalize_target,
                                                                    self_repair_scale = self_repair_scale,
-                                                                   norm_target_rms = final_layer_normalize_target)
+                                                                   max_change_per_component = max_change_per_component)
 
                 prev_layer_output_xent = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_xent",
                                                                   prev_layer_output, nonlin_output_dim,
+                                                                  norm_target_rms = final_layer_normalize_target,
                                                                   self_repair_scale = self_repair_scale,
-                                                                  norm_target_rms = final_layer_normalize_target)
+                                                                  max_change_per_component = max_change_per_component)
             elif nonlin_type == "pnorm" :
                 prev_layer_output_chain = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_chain",
                                                                  prev_layer_output, nonlin_input_dim, nonlin_output_dim,
@@ -445,6 +457,7 @@ def MakeConfigs(config_dir, splice_indexes_string,
                 raise Exception("Unknown nonlinearity type")
 
             nodes.AddFinalLayer(config_lines, prev_layer_output_chain, num_targets,
+                               max_change_per_component = max_change_per_component_final,
                                use_presoftmax_prior_scale = use_presoftmax_prior_scale,
                                prior_scale_file = prior_scale_file,
                                include_log_softmax = include_log_softmax)
@@ -452,6 +465,7 @@ def MakeConfigs(config_dir, splice_indexes_string,
             nodes.AddFinalLayer(config_lines, prev_layer_output_xent, num_targets,
                                 ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format(
                                     0.5 / xent_regularize),
+                                max_change_per_component = max_change_per_component_final,
                                 use_presoftmax_prior_scale = use_presoftmax_prior_scale,
                                 prior_scale_file = prior_scale_file,
                                 include_log_softmax = True,
@@ -460,8 +474,9 @@ def MakeConfigs(config_dir, splice_indexes_string,
             if nonlin_type == "relu":
                 prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i),
                                                             prev_layer_output, nonlin_output_dims[i],
+                                                            norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target,
                                                             self_repair_scale = self_repair_scale,
-                                                            norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target)
+                                                            max_change_per_component = max_change_per_component)
             elif nonlin_type == "pnorm":
                 prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i),
                                                            prev_layer_output, nonlin_input_dim, nonlin_output_dim,
@@ -478,6 +493,7 @@ def MakeConfigs(config_dir, splice_indexes_string,
             # Usually used with an objective-type such as "quadratic".
             # Applications are k-binary classification such Ideal Ratio Mask prediction.
             nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
+                               max_change_per_component = max_change_per_component_final,
                                use_presoftmax_prior_scale = use_presoftmax_prior_scale,
                                prior_scale_file = prior_scale_file,
                                include_log_softmax = include_log_softmax,
@@ -487,6 +503,7 @@ def MakeConfigs(config_dir, splice_indexes_string,
                 nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
                                     ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format(
                                           0.5 / xent_regularize),
+                                    max_change_per_component = max_change_per_component_final,
                                     use_presoftmax_prior_scale = use_presoftmax_prior_scale,
                                     prior_scale_file = prior_scale_file,
                                     include_log_softmax = True,
@@ -538,6 +555,8 @@ def Main():
                 xent_regularize = args.xent_regularize,
                 xent_separate_forward_affine = args.xent_separate_forward_affine,
                 self_repair_scale = args.self_repair_scale_nonlinearity,
+                max_change_per_component = args.max_change_per_component,
+                max_change_per_component_final = args.max_change_per_component_final,
                 objective_type = args.objective_type)
 
 if __name__ == "__main__":
diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc
index 1da521eebd3..c8dfff9e92a 100644
--- a/src/nnet3/nnet-chain-training.cc
+++ b/src/nnet3/nnet-chain-training.cc
@@ -34,17 +34,16 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts,
     num_minibatches_processed_(0) {
   if (opts.nnet_config.zero_component_stats)
     ZeroComponentStats(nnet);
-  if (opts.nnet_config.momentum == 0.0 &&
-      opts.nnet_config.max_param_change == 0.0) {
-    delta_nnet_= NULL;
-  } else {
-    KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 &&
-                 opts.nnet_config.max_param_change >= 0.0);
-    delta_nnet_ = nnet_->Copy();
-    bool is_gradient = false;  // setting this to true would disable the
-                               // natural-gradient updates.
-    SetZero(is_gradient, delta_nnet_);
-  }
+  KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 &&
+               opts.nnet_config.max_param_change >= 0.0);
+  delta_nnet_ = nnet_->Copy();
+  bool is_gradient = false;  // setting this to true would disable the
+                             // natural-gradient updates.
+  SetZero(is_gradient, delta_nnet_);
+  const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);
+  num_max_change_per_component_applied_.resize(num_updatable, 0); 
+  num_max_change_global_applied_ = 0;
+
   if (opts.nnet_config.read_cache != "") {
     bool binary;
     try {
@@ -71,8 +70,7 @@ void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
   const NnetComputation *computation = compiler_.Compile(request);
 
   NnetComputer computer(nnet_config.compute_config, *computation,
-                        *nnet_,
-                        (delta_nnet_ == NULL ? nnet_ : delta_nnet_));
+                        *nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, chain_eg.inputs);
   computer.Forward();
@@ -80,27 +78,7 @@ void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
   this->ProcessOutputs(chain_eg, &computer);
   computer.Backward();
 
-  if (delta_nnet_ != NULL) {
-    BaseFloat scale = (1.0 - nnet_config.momentum);
-    if (nnet_config.max_param_change != 0.0) {
-      BaseFloat param_delta =
-          std::sqrt(DotProduct(*delta_nnet_, *delta_nnet_)) * scale;
-      if (param_delta > nnet_config.max_param_change) {
-        if (param_delta - param_delta != 0.0) {
-          KALDI_WARN << "Infinite parameter change, will not apply.";
-          SetZero(false, delta_nnet_);
-        } else {
-          scale *= nnet_config.max_param_change / param_delta;
-          KALDI_LOG << "Parameter change too big: " << param_delta << " > "
-                    << "--max-param-change=" << nnet_config.max_param_change
-                    << ", scaling by "
-                    << nnet_config.max_param_change / param_delta;
-        }
-      }
-    }
-    AddNnet(*delta_nnet_, scale, nnet_);
-    ScaleNnet(nnet_config.momentum, delta_nnet_);
-  }
+  UpdateParamsWithMaxChange();
 }
 
 
@@ -169,6 +147,88 @@ void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg,
   }
 }
 
+void NnetChainTrainer::UpdateParamsWithMaxChange() {
+  KALDI_ASSERT(delta_nnet_ != NULL);
+  const NnetTrainerOptions &nnet_config = opts_.nnet_config;
+  // computes scaling factors for per-component max-change
+  const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);
+  Vector<BaseFloat> scale_factors = Vector<BaseFloat>(num_updatable);
+  BaseFloat param_delta_squared = 0.0;
+  int32 num_max_change_per_component_applied_per_minibatch = 0;
+  BaseFloat min_scale = 1.0;
+  std::string component_name_with_min_scale;
+  BaseFloat max_change_with_min_scale;
+  int32 i = 0;
+  for (int32 c = 0; c < delta_nnet_->NumComponents(); c++) {
+    Component *comp = delta_nnet_->GetComponent(c);
+    if (comp->Properties() & kUpdatableComponent) {
+      UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(comp);
+      if (uc == NULL)
+        KALDI_ERR << "Updatable component does not inherit from class "
+                  << "UpdatableComponent; change this code.";
+      BaseFloat max_param_change_per_comp = uc->MaxChange();
+      KALDI_ASSERT(max_param_change_per_comp >= 0.0);
+      BaseFloat dot_prod = uc->DotProduct(*uc);
+      if (max_param_change_per_comp != 0.0 &&
+          std::sqrt(dot_prod) > max_param_change_per_comp) {
+        scale_factors(i) = max_param_change_per_comp / std::sqrt(dot_prod);
+        num_max_change_per_component_applied_[i]++;
+        num_max_change_per_component_applied_per_minibatch++;
+        KALDI_VLOG(2) << "Parameters in " << delta_nnet_->GetComponentName(c)
+                      << " change too big: " << std::sqrt(dot_prod) << " > "
+                      << "max-change=" << max_param_change_per_comp
+                      << ", scaling by " << scale_factors(i);
+      } else {
+        scale_factors(i) = 1.0;
+      }
+      if  (i == 0 || scale_factors(i) < min_scale) {
+        min_scale =  scale_factors(i);
+        component_name_with_min_scale = delta_nnet_->GetComponentName(c);
+        max_change_with_min_scale = max_param_change_per_comp;
+      }
+      param_delta_squared += std::pow(scale_factors(i), 2.0) * dot_prod;
+      i++;
+    }
+  }
+  KALDI_ASSERT(i == scale_factors.Dim());
+  BaseFloat param_delta = std::sqrt(param_delta_squared);
+  // computes the scale for global max-change (with momentum)
+  BaseFloat scale = (1.0 - nnet_config.momentum);
+  if (nnet_config.max_param_change != 0.0) {
+    param_delta *= scale;
+    if (param_delta > nnet_config.max_param_change) {
+      if (param_delta - param_delta != 0.0) {
+        KALDI_WARN << "Infinite parameter change, will not apply.";
+        SetZero(false, delta_nnet_);
+      } else {
+        scale *= nnet_config.max_param_change / param_delta;
+        num_max_change_global_applied_++;
+      }
+    }
+  }
+  if ((nnet_config.max_param_change != 0.0 &&
+      param_delta > nnet_config.max_param_change &&
+      param_delta - param_delta == 0.0) || min_scale < 1.0) {
+    std::ostringstream ostr;
+    if (min_scale < 1.0)
+      ostr << "Per-component max-change active on "
+           << num_max_change_per_component_applied_per_minibatch
+           << " / " << num_updatable << " Updatable Components."
+           << "(smallest factor=" << min_scale << " on "
+           << component_name_with_min_scale
+           << " with max-change=" << max_change_with_min_scale <<"). "; 
+    if (param_delta > nnet_config.max_param_change)
+      ostr << "Global max-change factor was "
+           << nnet_config.max_param_change / param_delta
+           << " with max-change=" << nnet_config.max_param_change << ".";
+    KALDI_LOG << ostr.str();
+  }
+  // applies both of the max-change scalings all at once, component by component
+  // and updates parameters
+  scale_factors.Scale(scale);
+  AddNnetComponents(*delta_nnet_, scale_factors, scale, nnet_);
+  ScaleNnet(nnet_config.momentum, delta_nnet_);
+}
 
 bool NnetChainTrainer::PrintTotalStats() const {
   unordered_map<std::string, ObjectiveFunctionInfo>::const_iterator
@@ -183,6 +243,29 @@ bool NnetChainTrainer::PrintTotalStats() const {
   return ans;
 }
 
+void NnetChainTrainer::PrintMaxChangeStats() const {
+  KALDI_ASSERT(delta_nnet_ != NULL);
+  int32 i = 0;
+  for (int32 c = 0; c < delta_nnet_->NumComponents(); c++) {
+    Component *comp = delta_nnet_->GetComponent(c);
+    if (comp->Properties() & kUpdatableComponent) {
+      UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(comp);
+      if (uc == NULL)
+        KALDI_ERR << "Updatable component does not inherit from class "
+                  << "UpdatableComponent; change this code.";
+      if (num_max_change_per_component_applied_[i] > 0)
+        KALDI_LOG << "For " << delta_nnet_->GetComponentName(c)
+                  << ", per-component max-change was enforced "
+                  << (100.0 * num_max_change_per_component_applied_[i]) /
+                     num_minibatches_processed_ << " \% of the time.";
+      i++;
+    }
+  }
+  if (num_max_change_global_applied_ > 0)
+    KALDI_LOG << "The global max-change was enforced "
+              << (100.0 * num_max_change_global_applied_) /
+                 num_minibatches_processed_ << " \% of the time.";
+}
 
 NnetChainTrainer::~NnetChainTrainer() {
   if (opts_.nnet_config.write_cache != "") {
diff --git a/src/nnet3/nnet-chain-training.h b/src/nnet3/nnet-chain-training.h
index a4810fe16c6..4a3d84353d9 100644
--- a/src/nnet3/nnet-chain-training.h
+++ b/src/nnet3/nnet-chain-training.h
@@ -64,11 +64,20 @@ class NnetChainTrainer {
   // Prints out the final stats, and return true if there was a nonzero count.
   bool PrintTotalStats() const;
 
+  // Prints out the max-change stats (if nonzero): the percentage of time that
+  // per-component max-change and global max-change were enforced.
+  void PrintMaxChangeStats() const;
+
   ~NnetChainTrainer();
  private:
   void ProcessOutputs(const NnetChainExample &eg,
                       NnetComputer *computer);
 
+  // Applies per-component max-change and global max-change to all updatable
+  // components in *delta_nnet_, and use *delta_nnet_ to update parameters
+  // in *nnet_.
+  void UpdateParamsWithMaxChange();
+
   const NnetChainTrainingOptions opts_;
 
   chain::DenominatorGraph den_graph_;
@@ -85,6 +94,10 @@ class NnetChainTrainer {
   // So we store the objective functions per output layer.
   int32 num_minibatches_processed_;
 
+  // stats for max-change.
+  std::vector<int32> num_max_change_per_component_applied_;
+  int32 num_max_change_global_applied_;
+
   unordered_map<std::string, ObjectiveFunctionInfo, StringHasher> objf_info_;
 };
 
diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc
index 168a2a5350a..cfc28242156 100644
--- a/src/nnet3/nnet-component-itf.cc
+++ b/src/nnet3/nnet-component-itf.cc
@@ -183,7 +183,9 @@ bool Component::IsComputable(const MiscComputationInfo &misc_info,
 void UpdatableComponent::InitLearningRatesFromConfig(ConfigLine *cfl) {
   cfl->GetValue("learning-rate", &learning_rate_);
   cfl->GetValue("learning-rate-factor", &learning_rate_factor_);
-  if (learning_rate_ < 0.0 || learning_rate_factor_ < 0.0)
+  max_change_ = 0.0;
+  cfl->GetValue("max-change", &max_change_);
+  if (learning_rate_ < 0.0 || learning_rate_factor_ < 0.0 || max_change_ < 0.0)
     KALDI_ERR << "Bad initializer " << cfl->WholeLine();
 }
 
@@ -210,6 +212,12 @@ void UpdatableComponent::ReadUpdatableCommon(std::istream &is, bool binary) {
   } else {
     is_gradient_ = false;
   }
+  if (token == "<MaxChange>") {
+    ReadBasicType(is, binary, &max_change_);
+    ReadToken(is, binary, &token);
+  } else {
+    max_change_ = 0.0;
+  }
   if (token == "<LearningRate>") {
     ReadBasicType(is, binary, &learning_rate_);
   } else {
@@ -232,6 +240,10 @@ void UpdatableComponent::WriteUpdatableCommon(std::ostream &os,
     WriteToken(os, binary, "<IsGradient>");
     WriteBasicType(os, binary, is_gradient_);
   }
+  if (max_change_ > 0.0) {
+    WriteToken(os, binary, "<MaxChange>");
+    WriteBasicType(os, binary, max_change_);
+  }
   WriteToken(os, binary, "<LearningRate>");
   WriteBasicType(os, binary, learning_rate_);
 }
@@ -246,6 +258,8 @@ std::string UpdatableComponent::Info() const {
     stream << ", is-gradient=true";
   if (learning_rate_factor_ != 1.0)
     stream << ", learning-rate-factor=" << learning_rate_factor_;
+  if (max_change_ > 0.0)
+    stream << ", max-change=" << max_change_;
   return stream.str();
 }
 
diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h
index 164f9d056e7..e1391630c9f 100644
--- a/src/nnet3/nnet-component-itf.h
+++ b/src/nnet3/nnet-component-itf.h
@@ -373,7 +373,7 @@ class UpdatableComponent: public Component {
   UpdatableComponent(const UpdatableComponent &other):
       learning_rate_(other.learning_rate_),
       learning_rate_factor_(other.learning_rate_factor_),
-      is_gradient_(other.is_gradient_) { }
+      is_gradient_(other.is_gradient_), max_change_(other.max_change_) { }
 
   /// \brief Sets parameters to zero, and if treat_as_gradient is true,
   ///  sets is_gradient_ to true and sets learning_rate_ to 1, ignoring
@@ -381,7 +381,7 @@ class UpdatableComponent: public Component {
   virtual void SetZero(bool treat_as_gradient) = 0;
 
   UpdatableComponent(): learning_rate_(0.001), learning_rate_factor_(1.0),
-                        is_gradient_(false) { }
+                        is_gradient_(false), max_change_(0.0) { }
 
   virtual ~UpdatableComponent() { }
 
@@ -408,6 +408,12 @@ class UpdatableComponent: public Component {
   /// a different value than x will returned.
   BaseFloat LearningRate() const { return learning_rate_; }
 
+  /// Gets per-component max-change value. Note: the components themselves do
+  /// not enforce the per-component max-change; it's enforced in class
+  /// NnetTrainer by querying the max-changes for each component.
+  /// See NnetTrainer::UpdateParamsWithMaxChange() in nnet3/nnet-training.cc.
+  BaseFloat MaxChange() const { return max_change_; }
+
   virtual std::string Info() const;
 
   /// The following new virtual function returns the total dimension of
@@ -446,6 +452,7 @@ class UpdatableComponent: public Component {
                       ///< than as parameters.  Its main effect is that we disable
                       ///< any natural-gradient update and just compute the standard
                       ///< gradient.
+  BaseFloat max_change_; ///< configuration value for imposing max-change
 
  private:
   const UpdatableComponent &operator = (const UpdatableComponent &other); // Disallow.
diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc
index 6940ba8302a..ba352af19be 100644
--- a/src/nnet3/nnet-simple-component.cc
+++ b/src/nnet3/nnet-simple-component.cc
@@ -2671,9 +2671,10 @@ void NaturalGradientAffineComponent::Init(
   SetNaturalGradientConfigs();
   if (max_change_per_sample > 0.0)
     KALDI_WARN << "You are setting a positive max_change_per_sample for "
-               << "NaturalGradientAffineComponent. But the per-component "
-               << "gradient clipping mechansim has been removed. Instead it's currently "
-               << "done at the whole model level.";
+               << "NaturalGradientAffineComponent. But it has been deprecated. "
+               << "Please use max_change for all updatable components instead "
+               << "to activate the per-component max change mechanism.";
+  KALDI_ASSERT(max_change_per_sample >= 0.0);
   max_change_per_sample_ = max_change_per_sample;
   is_gradient_ = false;  // not configurable; there's no reason you'd want this
   update_count_ = 0.0;
@@ -3366,9 +3367,9 @@ void NaturalGradientPerElementScaleComponent::Init(
   max_change_per_minibatch_ = max_change_per_minibatch;
   if (max_change_per_minibatch > 0.0)
     KALDI_WARN << "You are setting a positive max_change_per_minibatch for "
-               << "NaturalGradientPerElementScaleComponent. But the per-component "
-               << "gradient clipping mechansim has been removed. Instead it's currently "
-               << "done at the whole model level.";
+               << "NaturalGradientPerElementScaleComponent. But it has been deprecated. "
+               << "Please use max_change for all updatable components instead "
+               << "to activate the per-component max change mechanism.";
 }
 
 void NaturalGradientPerElementScaleComponent::Init(
diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc
index 037bc45013b..7251e4bc54b 100644
--- a/src/nnet3/nnet-training.cc
+++ b/src/nnet3/nnet-training.cc
@@ -32,16 +32,16 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config,
     num_minibatches_processed_(0) {
   if (config.zero_component_stats)
     ZeroComponentStats(nnet);
-  if (config.momentum == 0.0 && config.max_param_change == 0.0) {
-    delta_nnet_= NULL;
-  } else {
-    KALDI_ASSERT(config.momentum >= 0.0 &&
-                 config.max_param_change >= 0.0);
-    delta_nnet_ = nnet_->Copy();
-    bool is_gradient = false;  // setting this to true would disable the
-                               // natural-gradient updates.
-    SetZero(is_gradient, delta_nnet_);
-  }
+  KALDI_ASSERT(config.momentum >= 0.0 &&
+               config.max_param_change >= 0.0);
+  delta_nnet_ = nnet_->Copy();
+  bool is_gradient = false;  // setting this to true would disable the
+                             // natural-gradient updates.
+  SetZero(is_gradient, delta_nnet_);
+  const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);
+  num_max_change_per_component_applied_.resize(num_updatable, 0); 
+  num_max_change_global_applied_ = 0;
+
   if (config_.read_cache != "") {
     bool binary;
     try {
@@ -65,8 +65,7 @@ void NnetTrainer::Train(const NnetExample &eg) {
   const NnetComputation *computation = compiler_.Compile(request);
 
   NnetComputer computer(config_.compute_config, *computation,
-                        *nnet_,
-                        (delta_nnet_ == NULL ? nnet_ : delta_nnet_));
+                        *nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.io);
   computer.Forward();
@@ -74,26 +73,7 @@ void NnetTrainer::Train(const NnetExample &eg) {
   this->ProcessOutputs(eg, &computer);
   computer.Backward();
 
-  if (delta_nnet_ != NULL) {
-    BaseFloat scale = (1.0 - config_.momentum);
-    if (config_.max_param_change != 0.0) {
-      BaseFloat param_delta =
-          std::sqrt(DotProduct(*delta_nnet_, *delta_nnet_)) * scale;
-      if (param_delta > config_.max_param_change) {
-        if (param_delta - param_delta != 0.0) {
-          KALDI_WARN << "Infinite parameter change, will not apply.";
-          SetZero(false, delta_nnet_);
-        } else {
-          scale *= config_.max_param_change / param_delta;
-          KALDI_LOG << "Parameter change too big: " << param_delta << " > "
-                    << "--max-param-change=" << config_.max_param_change
-                    << ", scaling by " << config_.max_param_change / param_delta;
-        }
-      }
-    }
-    AddNnet(*delta_nnet_, scale, nnet_);
-    ScaleNnet(config_.momentum, delta_nnet_);
-  }
+  UpdateParamsWithMaxChange();
 }
 
 void NnetTrainer::ProcessOutputs(const NnetExample &eg,
@@ -118,6 +98,88 @@ void NnetTrainer::ProcessOutputs(const NnetExample &eg,
   }
 }
 
+void NnetTrainer::UpdateParamsWithMaxChange() {
+  KALDI_ASSERT(delta_nnet_ != NULL);
+  // computes scaling factors for per-component max-change
+  const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);
+  Vector<BaseFloat> scale_factors = Vector<BaseFloat>(num_updatable);
+  BaseFloat param_delta_squared = 0.0;
+  int32 num_max_change_per_component_applied_per_minibatch = 0;
+  BaseFloat min_scale = 1.0;
+  std::string component_name_with_min_scale;
+  BaseFloat max_change_with_min_scale;
+  int32 i = 0;
+  for (int32 c = 0; c < delta_nnet_->NumComponents(); c++) {
+    Component *comp = delta_nnet_->GetComponent(c);
+    if (comp->Properties() & kUpdatableComponent) {
+      UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(comp);
+      if (uc == NULL)
+        KALDI_ERR << "Updatable component does not inherit from class "
+                  << "UpdatableComponent; change this code.";
+      BaseFloat max_param_change_per_comp = uc->MaxChange();
+      KALDI_ASSERT(max_param_change_per_comp >= 0.0);
+      BaseFloat dot_prod = uc->DotProduct(*uc);
+      if (max_param_change_per_comp != 0.0 &&
+          std::sqrt(dot_prod) > max_param_change_per_comp) {
+        scale_factors(i) = max_param_change_per_comp / std::sqrt(dot_prod);
+        num_max_change_per_component_applied_[i]++;
+        num_max_change_per_component_applied_per_minibatch++;
+        KALDI_VLOG(2) << "Parameters in " << delta_nnet_->GetComponentName(c)
+                      << " change too big: " << std::sqrt(dot_prod) << " > "
+                      << "max-change=" << max_param_change_per_comp
+                      << ", scaling by " << scale_factors(i);
+      } else {
+        scale_factors(i) = 1.0;
+      }
+      if  (i == 0 || scale_factors(i) < min_scale) {
+        min_scale =  scale_factors(i);
+        component_name_with_min_scale = delta_nnet_->GetComponentName(c);
+        max_change_with_min_scale = max_param_change_per_comp;
+      }
+      param_delta_squared += std::pow(scale_factors(i), 2.0) * dot_prod;
+      i++;
+    }
+  }
+  KALDI_ASSERT(i == scale_factors.Dim());
+  BaseFloat param_delta = std::sqrt(param_delta_squared);
+  // computes the scale for global max-change (with momentum)
+  BaseFloat scale = (1.0 - config_.momentum);
+  if (config_.max_param_change != 0.0) {
+    param_delta *= scale;
+    if (param_delta > config_.max_param_change) {
+      if (param_delta - param_delta != 0.0) {
+        KALDI_WARN << "Infinite parameter change, will not apply.";
+        SetZero(false, delta_nnet_);
+      } else {
+        scale *= config_.max_param_change / param_delta;
+        num_max_change_global_applied_++;
+      }
+    }
+  }
+  if ((config_.max_param_change != 0.0 &&
+      param_delta > config_.max_param_change &&
+      param_delta - param_delta == 0.0) || min_scale < 1.0) {
+    std::ostringstream ostr;
+    if (min_scale < 1.0)
+      ostr << "Per-component max-change active on "
+           << num_max_change_per_component_applied_per_minibatch
+           << " / " << num_updatable << " Updatable Components."
+           << "(smallest factor=" << min_scale << " on "
+           << component_name_with_min_scale
+           << " with max-change=" << max_change_with_min_scale <<"). "; 
+    if (param_delta > config_.max_param_change)
+      ostr << "Global max-change factor was "
+           << config_.max_param_change / param_delta
+           << " with max-change=" << config_.max_param_change << ".";
+    KALDI_LOG << ostr.str();
+  }
+  // applies both of the max-change scalings all at once, component by component
+  // and updates parameters
+  scale_factors.Scale(scale);
+  AddNnetComponents(*delta_nnet_, scale_factors, scale, nnet_);
+  ScaleNnet(config_.momentum, delta_nnet_);
+}
+
 bool NnetTrainer::PrintTotalStats() const {
   unordered_map<std::string, ObjectiveFunctionInfo>::const_iterator
       iter = objf_info_.begin(),
@@ -128,9 +190,34 @@ bool NnetTrainer::PrintTotalStats() const {
     const ObjectiveFunctionInfo &info = iter->second;
     ans = ans || info.PrintTotalStats(name);
   }
+  PrintMaxChangeStats();
   return ans;
 }
 
+void NnetTrainer::PrintMaxChangeStats() const {
+  KALDI_ASSERT(delta_nnet_ != NULL);
+  int32 i = 0;
+  for (int32 c = 0; c < delta_nnet_->NumComponents(); c++) {
+    Component *comp = delta_nnet_->GetComponent(c);
+    if (comp->Properties() & kUpdatableComponent) {
+      UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(comp);
+      if (uc == NULL)
+        KALDI_ERR << "Updatable component does not inherit from class "
+                  << "UpdatableComponent; change this code.";
+      if (num_max_change_per_component_applied_[i] > 0)
+        KALDI_LOG << "For " << delta_nnet_->GetComponentName(c)
+                  << ", per-component max-change was enforced "
+                  << (100.0 * num_max_change_per_component_applied_[i]) /
+                     num_minibatches_processed_ << " \% of the time.";
+      i++;
+    }
+  }
+  if (num_max_change_global_applied_ > 0)
+    KALDI_LOG << "The global max-change was enforced "
+              << (100.0 * num_max_change_global_applied_) /
+                 num_minibatches_processed_ << " \% of the time.";
+}
+
 void ObjectiveFunctionInfo::UpdateStats(
     const std::string &output_name,
     int32 minibatches_per_phase,
diff --git a/src/nnet3/nnet-training.h b/src/nnet3/nnet-training.h
index 2900edc1c13..70c90267c66 100644
--- a/src/nnet3/nnet-training.h
+++ b/src/nnet3/nnet-training.h
@@ -151,11 +151,20 @@ class NnetTrainer {
   // Prints out the final stats, and return true if there was a nonzero count.
   bool PrintTotalStats() const;
 
+  // Prints out the max-change stats (if nonzero): the percentage of time that
+  // per-component max-change and global max-change were enforced.
+  void PrintMaxChangeStats() const;
+
   ~NnetTrainer();
  private:
   void ProcessOutputs(const NnetExample &eg,
                       NnetComputer *computer);
 
+  // Applies per-component max-change and global max-change to all updatable
+  // components in *delta_nnet_, and use *delta_nnet_ to update parameters
+  // in *nnet_.
+  void UpdateParamsWithMaxChange();
+
   const NnetTrainerOptions config_;
   Nnet *nnet_;
   Nnet *delta_nnet_;  // Only used if momentum != 0.0 or max-param-change !=
@@ -170,6 +179,10 @@ class NnetTrainer {
   // So we store the objective functions per output layer.
   int32 num_minibatches_processed_;
 
+  // stats for max-change.
+  std::vector<int32> num_max_change_per_component_applied_;
+  int32 num_max_change_global_applied_;
+
   unordered_map<std::string, ObjectiveFunctionInfo, StringHasher> objf_info_;
 };
 
diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc
index 955e200d072..d65193d9a54 100644
--- a/src/nnet3/nnet-utils.cc
+++ b/src/nnet3/nnet-utils.cc
@@ -351,6 +351,33 @@ void ScaleNnet(BaseFloat scale, Nnet *nnet) {
   }
 }
 
+void AddNnetComponents(const Nnet &src, const Vector<BaseFloat> &alphas,
+                       BaseFloat scale, Nnet *dest) {
+  if (src.NumComponents() != dest->NumComponents())
+    KALDI_ERR << "Trying to add incompatible nnets.";
+  int32 i = 0;
+  for (int32 c = 0; c < src.NumComponents(); c++) {
+    const Component *src_comp = src.GetComponent(c);
+    Component *dest_comp = dest->GetComponent(c);
+    if (src_comp->Properties() & kUpdatableComponent) {
+      // For now all updatable components inherit from class UpdatableComponent.
+      // If that changes in future, we will change this code.
+      const UpdatableComponent *src_uc =
+          dynamic_cast<const UpdatableComponent*>(src_comp);
+      UpdatableComponent *dest_uc =
+          dynamic_cast<UpdatableComponent*>(dest_comp);
+      if (src_uc == NULL || dest_uc == NULL)
+        KALDI_ERR << "Updatable component does not inherit from class "
+            "UpdatableComponent; change this code.";
+      KALDI_ASSERT(i < alphas.Dim());
+      dest_uc->Add(alphas(i++), *src_uc);
+    } else { // add stored stats
+      dest_comp->Add(scale, *src_comp);
+    }
+  }
+  KALDI_ASSERT(i == alphas.Dim());
+}
+
 void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest) {
   if (src.NumComponents() != dest->NumComponents())
     KALDI_ERR << "Trying to add incompatible nnets.";
diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h
index 9606bd5d5b7..1e0dcefd703 100644
--- a/src/nnet3/nnet-utils.h
+++ b/src/nnet3/nnet-utils.h
@@ -145,6 +145,12 @@ void ScaleNnetComponents(const Vector<BaseFloat> &scales,
 /// stored stats).
 void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest);
 
+/// Does *dest += alpha * src for updatable components (affect nnet parameters),
+/// and *dest += scale * src for other components (affect stored stats).
+/// Here, alphas is a vector of size equal to the number of updatable components
+void AddNnetComponents(const Nnet &src, const Vector<BaseFloat> &alphas,
+                       BaseFloat scale, Nnet *dest);
+
 /// Returns the total of the number of parameters in the updatable components of
 /// the nnet.
 int32 NumParameters(const Nnet &src);