Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions egs/svhn/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

This directory contains example scripts for image classification with the
SVHN (Street View House Numbers) dataset, which is available for free from
http://ufldl.stanford.edu/housenumbers/.

This demonstrates applying the nnet3 framework to image classification for
fixed size images.
13 changes: 13 additions & 0 deletions egs/svhn/v1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export cmd="queue.pl"
1 change: 1 addition & 0 deletions egs/svhn/v1/image
109 changes: 109 additions & 0 deletions egs/svhn/v1/local/nnet3/run_cnn_1a.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/bin/bash

# steps/info/nnet3_dir_info.pl exp/cnn1a
# exp/cnn1a: num-iters=108 nj=2..4 num-params=0.5M dim=96->10 combine=-0.09->-0.09 loglike:train/valid[71,107,final]=(-0.101,-0.074,-0.067/-0.189,-0.144,-0.136) accuracy:train/valid[71,107,final]=(0.973,0.9834,0.9850/0.949,0.963,0.966)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail


# training options
stage=0
train_stage=-10
srand=0
reporting_email=
affix=1a


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/cnn${affix}

egs=exp/egs

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.

common1="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=20"
common2="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=30"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=256
output-layer name=output dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$cmd" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=25 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=2 \
--trainer.optimization.num-jobs-final=4 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=25.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
113 changes: 113 additions & 0 deletions egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/bin/bash

# nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97)

# steps/info/nnet3_dir_info.pl exp/cnn_aug1a:
# exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
srand=0
reporting_email=
affix=_aug1a


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/cnn${affix}

egs=exp/egs

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.

common1="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=40"
common2="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=80"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512
output-layer name=output dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$cmd" \
--image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=30 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=2 \
--trainer.optimization.num-jobs-final=4 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=25.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
130 changes: 130 additions & 0 deletions egs/svhn/v1/local/nnet3/run_resnet_1b.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/bin/bash

# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.055,-0.041,-0.035/-0.097,-0.079,-0.074) accuracy:train/valid[85,129,final]=(0.9882,0.9924,0.9946/0.977,0.9817,0.9840)

# This setup is based on the one in cifar/v1/local/nnet3/run_resnet_1{a,b}.sh.
# We are reducing the number of epochs quite a bit, since there is so much
# more data here; and reducing the proportional-shrink value since there is
# more data.
# The augmentation options are changed, with no horizontal flip, less vertical
# shift, and much less horizontal shift.



# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
srand=0
reporting_email=
affix=1b


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/resnet${affix}

egs=exp/egs

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.


nf1=48
nf2=96
nf3=256
nb3=128

common="required-time-offsets=0 height-offsets=-1,0,1"
res_opts="bypass-source=batchnorm"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-layer name=conv1 height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
output-layer name=output learning-rate-factor=0.1 dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=30 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=2 \
--trainer.optimization.num-jobs-final=4 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=25.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
Loading