kaldi-asr · danpovey · May 13, 2017 · May 13, 2017
diff --git a/egs/cifar/v1/local/nnet3/compare.sh b/egs/cifar/v1/local/nnet3/compare.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# this script is used for comparing trained models between systems.
+# e.g. local/nnet3/compare.sh exp/resnet1{b,c}_cifar10
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/resnet1{b,c}_cifar10"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 12s" " $(basename $x)";   done
+echo
+
+
+echo -n "# final test accuracy: "
+for x in $*; do
+  acc=$(grep acc $x/log/compute_prob_valid.final.log | awk '{print $8}')
+  printf "% 12s" $acc
+done
+
+echo
+echo -n "# final train accuracy: "
+for x in $*; do
+  acc=$(grep acc $x/log/compute_prob_train.final.log | awk '{print $8}')
+  printf "% 12s" $acc
+done
+
+echo
+echo -n "# final test objf:      "
+for x in $*; do
+  objf=$(grep log-like $x/log/compute_prob_valid.final.log | awk '{print $8}')
+  printf "% 12s" $objf
+done
+
+echo
+echo -n "# final train objf:     "
+for x in $*; do
+  objf=$(grep log-like $x/log/compute_prob_train.final.log | awk '{print $8}')
+  printf "% 12s" $objf
+done
+
+echo
+echo -n "# num-parameters:      "
+for x in $*; do
+  params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+  printf "% 12s" $params
+done
+
+echo
diff --git a/egs/cifar/v1/local/nnet3/run_cnn_aug_1b.sh b/egs/cifar/v1/local/nnet3/run_cnn_aug_1b.sh
@@ -1,10 +1,14 @@
 #!/bin/bash
 
-# aug_1b is the same as 1e but with data augmentation
-# accuracy 84.5% (1e has accuracy 83%)
+# run_cnn_aug_1b is the same as run_cnn_1e but with data augmentation.
+
+# accuracy is 0.857, vs. 0.83 for the un-augmented baseline.
+
+# exp/cnn_aug_1b_cifar10: num-iters=60 nj=1..2 num-params=2.2M dim=96->10 combine=-0.40->-0.38 loglike:train/valid[39,59,final]=(-0.35,-0.26,-0.26/-0.47,-0.42,-0.42) accuracy:train/valid[39,59,final]=(0.88,0.91,0.91/0.84,0.86,0.86)
+
+# grep Overall exp/cnn_aug_1b_cifar10/log/compute_prob_valid.final.log  | grep acc
+# LOG (nnet3-compute-prob[5.1]:PrintTotalStats():nnet-diagnostics.cc:165) Overall accuracy for 'output' is 0.8567 per frame, over 10000 frames.#
 
-# steps/info/nnet3_dir_info.pl exp/cnn_aug_1b_cifar10
-# exp/cnn_aug_1b_cifar10/: num-iters=60 nj=1..2 num-params=0.2M dim=96->10 combine=-0.53->-0.50 loglike:train/valid[39,59,final]=(-0.57,-0.45,-0.48/-0.68,-0.62,-0.64) accuracy:train/valid[39,59,final]=(0.80,0.84,0.83/0.76,0.79,0.78)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
@@ -17,7 +21,7 @@ train_stage=-10
 dataset=cifar10
 srand=0
 reporting_email=
-affix=_aug_1e
+affix=_aug_1b
 
 
 # End configuration section.
@@ -93,7 +97,7 @@ if [ $stage -le 2 ]; then
 
   steps/nnet3/train_raw_dnn.py --stage=$train_stage \
     --cmd="$train_cmd" \
-    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=30 \

diff --git a/egs/cifar/v1/local/nnet3/run_cnn_aug_1c.sh b/egs/cifar/v1/local/nnet3/run_cnn_aug_1c.sh
@@ -5,7 +5,7 @@
 # accuracy improved from 85.8% to 88%
 
 # steps/info/nnet3_dir_info.pl exp/cnn_aug_1c_cifar10/
-# exp/cnn_aug_1c_cifar10/: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.24 loglike:train/valid[132,199,final]=(-0.18,-0.12,-0.12/-0.39,-0.37,-0.37) accuracy:train/valid[132,199,final]=(0.94,0.96,0.96/0.87,0.88,0.88)
+# exp/cnn_aug_1c_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.23->-0.24 loglike:train/valid[132,199,final]=(-0.17,-0.12,-0.12/-0.39,-0.36,-0.37) accuracy:train/valid[132,199,final]=(0.94,0.96,0.96/0.87,0.88,0.88)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail

diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1a.sh b/egs/cifar/v1/local/nnet3/run_resnet_1a.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+# run_resnet_1a.sh is a quite well-performing resnet.
+#  It includes a form of shrinkage that approximates l2 regularization.
+#  (c.f. --proportional-shrink).
+
+#  Definitely better:
+
+# local/nnet3/compare.sh exp/resnet1a_cifar10
+# System                 resnet1a_cifar10
+# final test accuracy:       0.9481
+# final train accuracy:        0.9992
+# final test objf:          -0.171369
+# final train objf:       -0.00980603
+# num-parameters:            1322730
+
+# local/nnet3/compare.sh exp/resnet1a_cifar100
+# System              resnet1a_cifar100
+# final test accuracy:        0.7478
+# final train accuracy:       0.9446
+# final test objf:           -0.899789
+# final train objf:          -0.22468
+# num-parameters:             1345860
+
+
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=1a
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}_${dataset}
+
+egs=exp/${dataset}_egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=256
+  nb3=128
+
+  common="required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=60 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=50.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1b.sh b/egs/cifar/v1/local/nnet3/run_resnet_1b.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+# 1b is as 1a but using more epochs: 100 instead of 60.
+# This helps a bit.
+
+#exp/resnet1b_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.01->-0.01 loglike:train/valid[87,132,final]=(-0.13,-0.03,-0.01/-0.27,-0.21,-0.16) accuracy:train/valid[87,132,final]=(0.95,0.99,1.00/0.91,0.94,0.95)
+#exp/resnet1b_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.22->-0.19 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.16/-1.22,-1.06,-0.89) accuracy:train/valid[87,132,final]=(0.78,0.93,0.96/0.67,0.72,0.76)
+
+
+# local/nnet3/compare.sh exp/resnet1a_cifar10 exp/resnet1b_cifar10
+# System                resnet1a_cifar10 resnet1b_cifar10
+# final test accuracy:       0.9481      0.9521
+# final train accuracy:       0.9992      0.9998
+# final test objf:         -0.171369   -0.160283
+# final train objf:      -0.00980603 -0.00672504
+# num-parameters:           1322730     1322730
+
+# local/nnet3/compare.sh exp/resnet1a_cifar100 exp/resnet1b_cifar100
+# System                resnet1a_cifar100 resnet1b_cifar100
+# final test accuracy:       0.7478      0.7597
+# final train accuracy:       0.9446      0.9638
+# final test objf:         -0.899789   -0.889707
+# final train objf:         -0.22468   -0.163996
+# num-parameters:           1345860     1345860
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=1b
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}_${dataset}
+
+egs=exp/${dataset}_egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=256
+  nb3=128
+
+  common="required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=100 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=50.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;