[src,egs,scripts] Add SVHN example; fix asymmetry in image-augmentation; minor script changes. #1630

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

danpovey merged 6 commits into kaldi-asr:kaldi_52 from danpovey:svhn

May 18, 2017

egs/svhn/README.txt

-Original file line number
+Diff line change
@@ -0,0 +1,7 @@
+    This directory contains example scripts for image classification with the
+    SVHN (Street View House Numbers) dataset, which is available for free from
+    http://ufldl.stanford.edu/housenumbers/.
+    This demonstrates applying the nnet3 framework to image classification for
+    fixed size images.

egs/svhn/v1/cmd.sh

-Original file line number
+Diff line change
@@ -0,0 +1,13 @@
+    # you can change cmd.sh depending on what type of queue you are using.
+    # If you have no queueing system and want to run on a local machine, you
+    # can change all instances 'queue.pl' to run.pl (but be careful and run
+    # commands one by one: most recipes will exhaust the memory on your
+    # machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+    # with slurm.  Different queues are configured differently, with different
+    # queue names and different ways of specifying things like memory;
+    # to account for these differences you can create and edit the file
+    # conf/queue.conf to match your queue's configuration.  Search for
+    # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+    # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+    export cmd="queue.pl"

egs/svhn/v1/image

Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../cifar/v1/image

egs/svhn/v1/local/nnet3/run_cnn_1a.sh

-Original file line number
+Diff line change
@@ -0,0 +1,109 @@
+    #!/bin/bash
+    # steps/info/nnet3_dir_info.pl exp/cnn1a
+    # exp/cnn1a: num-iters=108 nj=2..4 num-params=0.5M dim=96->10 combine=-0.09->-0.09 loglike:train/valid[71,107,final]=(-0.101,-0.074,-0.067/-0.189,-0.144,-0.136) accuracy:train/valid[71,107,final]=(0.973,0.9834,0.9850/0.949,0.963,0.966)
+    # Set -e here so that we catch if any executable fails immediately
+    set -euo pipefail
+    # training options
+    stage=0
+    train_stage=-10
+    srand=0
+    reporting_email=
+    affix=1a
+    # End configuration section.
+    echo "$0 $@"  # Print the command line for logging
+    . ./cmd.sh
+    . ./path.sh
+    . ./utils/parse_options.sh
+    if ! cuda-compiled; then
+      cat <<EOF && exit 1
+    This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+    If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+    where "nvcc" is installed.
+    EOF
+    fi
+    dir=exp/cnn${affix}
+    egs=exp/egs
+    if [ ! -d $egs ]; then
+      echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+      echo "    run.sh before this script."
+      exit 1
+    fi
+    # check that the expected files are in the egs directory.
+    for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+             $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+             $egs/info/output_dim; do
+      if [ ! -e $f ]; then
+        echo "$0: expected file $f to exist."
+        exit 1;
+      fi
+    done
+    mkdir -p $dir/log
+    if [ $stage -le 1 ]; then
+      mkdir -p $dir
+      echo "$0: creating neural net configs using the xconfig parser";
+      num_targets=$(cat $egs/info/output_dim)
+      # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+      # images.
+      common1="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=20"
+      common2="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=30"
+      mkdir -p $dir/configs
+      cat <<EOF > $dir/configs/network.xconfig
+      input dim=96 name=input
+      conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
+      conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
+      conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
+      conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
+      conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
+      relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=256
+      output-layer name=output dim=$num_targets
+    EOF
+      steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+    fi
+    if [ $stage -le 2 ]; then
+      steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+        --cmd="$cmd" \
+        --trainer.srand=$srand \
+        --trainer.max-param-change=2.0 \
+        --trainer.num-epochs=25 \
+        --egs.frames-per-eg=1 \
+        --trainer.optimization.num-jobs-initial=2 \
+        --trainer.optimization.num-jobs-final=4 \
+        --trainer.optimization.initial-effective-lrate=0.003 \
+        --trainer.optimization.final-effective-lrate=0.0003 \
+        --trainer.optimization.minibatch-size=256,128,64 \
+        --trainer.optimization.proportional-shrink=25.0 \
+        --trainer.shuffle-buffer-size=2000 \
+        --egs.dir="$egs" \
+        --use-gpu=true \
+        --reporting.email="$reporting_email" \
+        --dir=$dir  || exit 1;
+    fi
+    exit 0;

egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh

-Original file line number
+Diff line change
@@ -0,0 +1,113 @@
+    #!/bin/bash
+    # nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97)
+    # steps/info/nnet3_dir_info.pl exp/cnn_aug1a:
+    # exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97)
+    # Set -e here so that we catch if any executable fails immediately
+    set -euo pipefail
+    # training options
+    stage=0
+    train_stage=-10
+    srand=0
+    reporting_email=
+    affix=_aug1a
+    # End configuration section.
+    echo "$0 $@"  # Print the command line for logging
+    . ./cmd.sh
+    . ./path.sh
+    . ./utils/parse_options.sh
+    if ! cuda-compiled; then
+      cat <<EOF && exit 1
+    This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+    If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+    where "nvcc" is installed.
+    EOF
+    fi
+    dir=exp/cnn${affix}
+    egs=exp/egs
+    if [ ! -d $egs ]; then
+      echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+      echo "    run.sh before this script."
+      exit 1
+    fi
+    # check that the expected files are in the egs directory.
+    for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+             $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+             $egs/info/output_dim; do
+      if [ ! -e $f ]; then
+        echo "$0: expected file $f to exist."
+        exit 1;
+      fi
+    done
+    mkdir -p $dir/log
+    if [ $stage -le 1 ]; then
+      mkdir -p $dir
+      echo "$0: creating neural net configs using the xconfig parser";
+      num_targets=$(cat $egs/info/output_dim)
+      # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+      # images.
+      common1="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=40"
+      common2="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=80"
+      mkdir -p $dir/configs
+      cat <<EOF > $dir/configs/network.xconfig
+      input dim=96 name=input
+      conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
+      conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
+      conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
+      conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
+      conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
+      relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512
+      output-layer name=output dim=$num_targets
+    EOF
+      steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+    fi
+    if [ $stage -le 2 ]; then
+      steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+        --cmd="$cmd" \
+        --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
+        --trainer.srand=$srand \
+        --trainer.max-param-change=2.0 \
+        --trainer.num-epochs=30 \
+        --egs.frames-per-eg=1 \
+        --trainer.optimization.num-jobs-initial=2 \
+        --trainer.optimization.num-jobs-final=4 \
+        --trainer.optimization.initial-effective-lrate=0.003 \
+        --trainer.optimization.final-effective-lrate=0.0003 \
+        --trainer.optimization.minibatch-size=256,128,64 \
+        --trainer.optimization.proportional-shrink=25.0 \
+        --trainer.shuffle-buffer-size=2000 \
+        --egs.dir="$egs" \
+        --use-gpu=true \
+        --reporting.email="$reporting_email" \
+        --dir=$dir  || exit 1;
+    fi
+    exit 0;

egs/svhn/v1/local/nnet3/run_resnet_1b.sh

-Original file line number
+Diff line change
@@ -0,0 +1,130 @@
+    #!/bin/bash
+    # exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.055,-0.041,-0.035/-0.097,-0.079,-0.074) accuracy:train/valid[85,129,final]=(0.9882,0.9924,0.9946/0.977,0.9817,0.9840)
+    # This setup is based on the one in cifar/v1/local/nnet3/run_resnet_1{a,b}.sh.
+    # We are reducing the number of epochs quite a bit, since there is so much
+    # more data here; and reducing the proportional-shrink value since there is
+    # more data.
+    # The augmentation options are changed, with no horizontal flip, less vertical
+    # shift, and much less horizontal shift.
+    # Set -e here so that we catch if any executable fails immediately
+    set -euo pipefail
+    # training options
+    stage=0
+    train_stage=-10
+    srand=0
+    reporting_email=
+    affix=1b
+    # End configuration section.
+    echo "$0 $@"  # Print the command line for logging
+    . ./cmd.sh
+    . ./path.sh
+    . ./utils/parse_options.sh
+    if ! cuda-compiled; then
+      cat <<EOF && exit 1
+    This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+    If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+    where "nvcc" is installed.
+    EOF
+    fi
+    dir=exp/resnet${affix}
+    egs=exp/egs
+    if [ ! -d $egs ]; then
+      echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+      echo "    run.sh before this script."
+      exit 1
+    fi
+    # check that the expected files are in the egs directory.
+    for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+             $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+             $egs/info/output_dim; do
+      if [ ! -e $f ]; then
+        echo "$0: expected file $f to exist."
+        exit 1;
+      fi
+    done
+    mkdir -p $dir/log
+    if [ $stage -le 1 ]; then
+      mkdir -p $dir
+      echo "$0: creating neural net configs using the xconfig parser";
+      num_targets=$(cat $egs/info/output_dim)
+      # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+      # images.
+      nf1=48
+      nf2=96
+      nf3=256
+      nb3=128
+      common="required-time-offsets=0 height-offsets=-1,0,1"
+      res_opts="bypass-source=batchnorm"
+      mkdir -p $dir/configs
+      cat <<EOF > $dir/configs/network.xconfig
+      input dim=96 name=input
+      conv-layer name=conv1 height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+      res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+      res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+      conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+      res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+      res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+      conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+      res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+      res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+      res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+      channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+      output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+    EOF
+      steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+    fi
+    if [ $stage -le 2 ]; then
+      steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+        --cmd="$train_cmd" \
+        --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
+        --trainer.srand=$srand \
+        --trainer.max-param-change=2.0 \
+        --trainer.num-epochs=30 \
+        --egs.frames-per-eg=1 \
+        --trainer.optimization.num-jobs-initial=2 \
+        --trainer.optimization.num-jobs-final=4 \
+        --trainer.optimization.initial-effective-lrate=0.003 \
+        --trainer.optimization.final-effective-lrate=0.0003 \
+        --trainer.optimization.minibatch-size=256,128,64 \
+        --trainer.optimization.proportional-shrink=25.0 \
+        --trainer.shuffle-buffer-size=2000 \
+        --egs.dir="$egs" \
+        --use-gpu=true \
+        --reporting.email="$reporting_email" \
+        --dir=$dir  || exit 1;
+    fi
+    exit 0;

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[src,egs,scripts] Add SVHN example; fix asymmetry in image-augmentation; minor script changes. #1630

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!