diff --git a/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh
new file mode 100755
index 00000000000..1eb448149ba
--- /dev/null
+++ b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+
+
+# 1d is as 1c but setting num-minibatches-history=40.
+# A bit better.
+
+# local/nnet3/compare.sh exp/cnn_aug_1c_cifar10 exp/cnn_aug_1d_cifar10
+# System                cnn_aug_1c_cifar10 cnn_aug_1d_cifar10
+# final test accuracy:       0.8834      0.8857
+# final train accuracy:       0.9644      0.9626
+# final test objf:         -0.362241   -0.356861
+# final train objf:        -0.114712   -0.114144
+# num-parameters:           2205290     2205290
+
+# local/nnet3/compare.sh exp/cnn_aug_1c_cifar100 exp/cnn_aug_1d_cifar100
+# System                cnn_aug_1c_cifar100 cnn_aug_1d_cifar100
+# final test accuracy:       0.6219      0.6237
+# final train accuracy:       0.8634      0.8688
+# final test objf:          -1.42399    -1.40784
+# final train objf:        -0.493349   -0.482047
+# num-parameters:           2251460     2251460
+
+
+# steps/info/nnet3_dir_info.pl exp/cnn_aug_1d_cifar10{,0}
+# exp/cnn_aug_1d_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.23 loglike:train/valid[132,199,final]=(-0.172,-0.114,-0.114/-0.38,-0.36,-0.36) accuracy:train/valid[132,199,final]=(0.938,0.963,0.963/0.879,0.887,0.886)
+# exp/cnn_aug_1d_cifar100: num-iters=200 nj=1..2 num-params=2.3M dim=96->100 combine=-0.90->-0.92 loglike:train/valid[132,199,final]=(-0.63,-0.48,-0.48/-1.43,-1.41,-1.41) accuracy:train/valid[132,199,final]=(0.821,0.868,0.869/0.61,0.62,0.62)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=_aug_1d
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/cnn${affix}_${dataset}
+
+egs=exp/${dataset}_egs
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+  a="num-minibatches-history=40.0"
+  common1="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=32"
+  common2="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=64"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
+  relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512
+  output-layer name=output dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=100 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1c.sh b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh
new file mode 100755
index 00000000000..0708b3d6eaa
--- /dev/null
+++ b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+# 1c is as 1b but setting num-minibatches-history=40.0 in the configs,
+# so the Fisher matrix estimates change less fast.
+# Seems to be helpfu.
+
+# local/nnet3/compare.sh exp/resnet1b_cifar10 exp/resnet1c_cifar10
+# System                resnet1b_cifar10 resnet1c_cifar10
+# final test accuracy:       0.9481      0.9514
+# final train accuracy:       0.9996           1
+# final test objf:         -0.163336   -0.157244
+# final train objf:      -0.00788341 -0.00751868
+# num-parameters:           1322730     1322730
+
+# local/nnet3/compare.sh exp/resnet1b_cifar100 exp/resnet1c_cifar100
+# System                resnet1b_cifar100 resnet1c_cifar100
+# final test accuracy:       0.7602      0.7627
+# final train accuracy:       0.9598        0.96
+# final test objf:         -0.888699   -0.862205
+# final train objf:        -0.164213   -0.174973
+# num-parameters:           1345860     1345860
+# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
+# exp/resnet1c_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.02->-0.01 loglike:train/valid[87,132,final]=(-0.115,-0.034,-0.0075/-0.24,-0.21,-0.157) accuracy:train/valid[87,132,final]=(0.960,0.9888,1.0000/0.925,0.938,0.951)
+# exp/resnet1c_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.24->-0.20 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.175/-1.20,-1.00,-0.86) accuracy:train/valid[87,132,final]=(0.78,0.923,0.960/0.67,0.73,0.76)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=1c
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}_${dataset}
+
+egs=exp/${dataset}_egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=256
+  nb3=128
+
+  a="num-minibatches-history=40.0"
+  common="$a required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="$a bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=100 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=50.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh
index 96e7254474a..e89ff125102 100755
--- a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh
+++ b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh
@@ -2,8 +2,9 @@
 
 # nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97)
 
-# steps/info/nnet3_dir_info.pl exp/cnn_aug1a:
-# exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97)
+
+# steps/info/nnet3_dir_info.pl exp/cnn_aug1a
+# exp/cnn_aug1a: num-iters=130 nj=2..4 num-params=2.8M dim=96->10 combine=-0.07->-0.06 loglike:train/valid[85,129,final]=(-0.090,-0.060,-0.054/-0.163,-0.110,-0.102) accuracy:train/valid[85,129,final]=(0.9764,0.9868,0.9886/0.958,0.9731,0.9762)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh
new file mode 100755
index 00000000000..cf2f92590d2
--- /dev/null
+++ b/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+# run_cnn_aug_1b.sh is like run_cnn_aug_1a.sh but setting
+# num-minibatches-history=40.0 (longer history for natural gradient),
+# and using the "egs2" examples with more archives, which necessitates
+# adjusting the proportional-shrink option (since it should be
+# proportional to archive size).
+
+# result improves 97.62 -> 97.71.
+
+# steps/info/nnet3_dir_info.pl exp/cnn_aug1b
+# exp/cnn_aug1b: num-iters=180 nj=2..4 num-params=2.8M dim=96->10 combine=-0.06->-0.06 loglike:train/valid[119,179,final]=(-0.066,-0.051,-0.049/-0.126,-0.103,-0.100) accuracy:train/valid[119,179,final]=(0.9846,0.9890,0.9900/0.970,0.9760,0.9771)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+srand=0
+reporting_email=
+affix=_aug1b
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/cnn${affix}
+
+egs=exp/egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+  a="num-minibatches-history=40.0"
+  common1="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=40"
+  common2="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=80"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
+  relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512
+  output-layer name=output dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=30 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=18.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/svhn/v1/local/nnet3/run_resnet_1b.sh b/egs/svhn/v1/local/nnet3/run_resnet_1b.sh
index 7e6ab60eae3..7f0540e90fe 100755
--- a/egs/svhn/v1/local/nnet3/run_resnet_1b.sh
+++ b/egs/svhn/v1/local/nnet3/run_resnet_1b.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.055,-0.041,-0.035/-0.097,-0.079,-0.074) accuracy:train/valid[85,129,final]=(0.9882,0.9924,0.9946/0.977,0.9817,0.9840)
+# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.049,-0.044,-0.036/-0.098,-0.085,-0.076) accuracy:train/valid[85,129,final]=(0.9904,0.9908,0.9940/0.9764,0.9804,0.9831)
 
 # This setup is based on the one in cifar/v1/local/nnet3/run_resnet_1{a,b}.sh.
 # We are reducing the number of epochs quite a bit, since there is so much
@@ -107,7 +107,7 @@ fi
 if [ $stage -le 2 ]; then
 
   steps/nnet3/train_raw_dnn.py --stage=$train_stage \
-    --cmd="$train_cmd" \
+    --cmd="$cmd" \
     --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
diff --git a/egs/svhn/v1/local/nnet3/run_resnet_1c.sh b/egs/svhn/v1/local/nnet3/run_resnet_1c.sh
new file mode 100755
index 00000000000..b56ee62b806
--- /dev/null
+++ b/egs/svhn/v1/local/nnet3/run_resnet_1c.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+# resnet1c is as resnet1b but adding "num-minibatches-history=40.0" to
+# all layers to increase the history size of natural gradient
+# (improves optimization), and using the "egs2" egs with more,
+# smaller archives.  Also changing the proportional-shrink option
+# to compensate for the change in archive size (it should vary
+# proportionally to the number of egs in the archive).
+
+# improves 98.31 -> 98.45.
+
+# exp/resnet1c: num-iters=180 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.03 loglike:train/valid[119,179,final]=(-0.047,-0.041,-0.034/-0.083,-0.075,-0.071) accuracy:train/valid[119,179,final]=(0.9914,0.9922,0.9944/0.9803,0.9826,0.9845)
+
+
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+srand=0
+reporting_email=
+affix=1b5
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}
+
+egs=exp/egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=256
+  nb3=128
+
+  a="num-minibatches-history=40.0"
+  common="$a required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="$a bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=30 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=18.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/svhn/v1/run.sh b/egs/svhn/v1/run.sh
index fc2e2ef7733..720f4a13e29 100755
--- a/egs/svhn/v1/run.sh
+++ b/egs/svhn/v1/run.sh
@@ -19,3 +19,9 @@ if [ $stage -le 1 ]; then
   # egs preparation
   image/nnet3/get_egs.sh --egs-per-archive 50000 --cmd "$cmd" data/train_all data/test exp/egs
 fi
+
+if [ $stage -le 2 ]; then
+  # Making a version of the egs that have more archives with fewer egs each (this seems to
+  # slightly improve results).  Eventually we'll disable the creation of the egs above.
+  image/nnet3/get_egs.sh --egs-per-archive 35000 --cmd "$cmd" data/train_all data/test exp/egs2
+fi
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh
index 08eeba59c3d..4c578c20ad1 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh
@@ -3,8 +3,8 @@
 
 # run_tdnn_1e.sh is like run_tdnn_1d.sh but batchnorm components instead of renorm
 
-exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.098->-0.097 xent:train/valid[167,252,final]=(-1.40,-1.34,-1.34/-1.50,-1.46,-1.46) logprob:train/valid[167,252,final]=(-0.091,-0.083,-0.083/-0.104,-0.101,-0.101)
-exp/chain_cleaned/tdnn1e_sp_bi/: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.095->-0.095 xent:train/valid[167,252,final]=(-1.37,-1.31,-1.31/-1.47,-1.44,-1.44) logprob:train/valid[167,252,final]=(-0.087,-0.078,-0.078/-0.102,-0.099,-0.099)
+# exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.098->-0.097 xent:train/valid[167,252,final]=(-1.40,-1.34,-1.34/-1.50,-1.46,-1.46) logprob:train/valid[167,252,final]=(-0.091,-0.083,-0.083/-0.104,-0.101,-0.101)
+# exp/chain_cleaned/tdnn1e_sp_bi/: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.095->-0.095 xent:train/valid[167,252,final]=(-1.37,-1.31,-1.31/-1.47,-1.44,-1.44) logprob:train/valid[167,252,final]=(-0.087,-0.078,-0.078/-0.102,-0.099,-0.099)
 
 # local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1d_sp_bi exp/chain_cleaned/tdnn1e_sp_bi
 # System                tdnn1d_sp_bi tdnn1e_sp_bi
@@ -49,7 +49,7 @@ nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
 # are just hardcoded at this level, in the commands below.
 train_stage=-10
 tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
-tdnn_affix=1d  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_affix=1e  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
 common_egs_dir=  # you can set this to use previously dumped egs.
 
 # End configuration section.
diff --git a/egs/wsj/s5/steps/info/nnet3_dir_info.pl b/egs/wsj/s5/steps/info/nnet3_dir_info.pl
index ad4a86e4afd..06d07a63755 100755
--- a/egs/wsj/s5/steps/info/nnet3_dir_info.pl
+++ b/egs/wsj/s5/steps/info/nnet3_dir_info.pl
@@ -146,9 +146,9 @@ sub get_combine_info {
 sub number_to_string {
   my ($value, $name) = @_;
   my $precision;
-  if (abs($value) < 0.02 or ($name eq "accuracy" and abs($value) > 0.98)) {
+  if (abs($value) < 0.02 or ($name eq "accuracy" and abs($value) > 0.97)) {
     $precision = 4;
-  } elsif (abs($value) < 0.2 or ($name eq "accuracy" and abs($value) > 0.8)) {
+  } elsif (abs($value) < 0.2 or ($name eq "accuracy" and abs($value) > 0.7)) {
     $precision = 3;
   } else {
     $precision = 2;
diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc
index da19b477337..f1e47b2794b 100644
--- a/src/nnet3/nnet-simple-component.cc
+++ b/src/nnet3/nnet-simple-component.cc
@@ -2670,8 +2670,8 @@ std::string NaturalGradientAffineComponent::Info() const {
   PrintParameterStats(stream, "bias", bias_params_, true);
   stream << ", rank-in=" << rank_in_
          << ", rank-out=" << rank_out_
-         << ", num_samples_history=" << num_samples_history_
-         << ", update_period=" << update_period_
+         << ", num-samples-history=" << num_samples_history_
+         << ", update-period=" << update_period_
          << ", alpha=" << alpha_;
   return stream.str();
 }
@@ -5375,7 +5375,8 @@ std::string BatchNormComponent::Info() const {
   std::ostringstream stream;
   stream << Type() << ", dim=" << dim_ << ", block-dim=" << block_dim_
          << ", epsilon=" << epsilon_ << ", target-rms=" << target_rms_
-         << ", count=" << count_;
+         << ", count=" << count_
+         << ", test-mode=" << (test_mode_ ? "true" : "false");
   if (count_ > 0) {
     Vector<BaseFloat> mean(stats_sum_), var(stats_sumsq_);
     mean.Scale(1.0 / count_);