Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions egs/cifar/v1/local/nnet3/compare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash

# this script is used for comparing trained models between systems.
# e.g. local/nnet3/compare.sh exp/resnet1{b,c}_cifar10


if [ $# == 0 ]; then
echo "Usage: $0: <dir1> [<dir2> ... ]"
echo "e.g.: $0 exp/resnet1{b,c}_cifar10"
exit 1
fi

echo "# $0 $*"



echo -n "# System "
for x in $*; do printf "% 12s" " $(basename $x)"; done
echo


echo -n "# final test accuracy: "
for x in $*; do
acc=$(grep acc $x/log/compute_prob_valid.final.log | awk '{print $8}')
printf "% 12s" $acc
done

echo
echo -n "# final train accuracy: "
for x in $*; do
acc=$(grep acc $x/log/compute_prob_train.final.log | awk '{print $8}')
printf "% 12s" $acc
done

echo
echo -n "# final test objf: "
for x in $*; do
objf=$(grep log-like $x/log/compute_prob_valid.final.log | awk '{print $8}')
printf "% 12s" $objf
done

echo
echo -n "# final train objf: "
for x in $*; do
objf=$(grep log-like $x/log/compute_prob_train.final.log | awk '{print $8}')
printf "% 12s" $objf
done

echo
echo -n "# num-parameters: "
for x in $*; do
params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
printf "% 12s" $params
done

echo
16 changes: 10 additions & 6 deletions egs/cifar/v1/local/nnet3/run_cnn_aug_1b.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#!/bin/bash

# aug_1b is the same as 1e but with data augmentation
# accuracy 84.5% (1e has accuracy 83%)
# run_cnn_aug_1b is the same as run_cnn_1e but with data augmentation.

# accuracy is 0.857, vs. 0.83 for the un-augmented baseline.

# exp/cnn_aug_1b_cifar10: num-iters=60 nj=1..2 num-params=2.2M dim=96->10 combine=-0.40->-0.38 loglike:train/valid[39,59,final]=(-0.35,-0.26,-0.26/-0.47,-0.42,-0.42) accuracy:train/valid[39,59,final]=(0.88,0.91,0.91/0.84,0.86,0.86)

# grep Overall exp/cnn_aug_1b_cifar10/log/compute_prob_valid.final.log | grep acc
# LOG (nnet3-compute-prob[5.1]:PrintTotalStats():nnet-diagnostics.cc:165) Overall accuracy for 'output' is 0.8567 per frame, over 10000 frames.#

# steps/info/nnet3_dir_info.pl exp/cnn_aug_1b_cifar10
# exp/cnn_aug_1b_cifar10/: num-iters=60 nj=1..2 num-params=0.2M dim=96->10 combine=-0.53->-0.50 loglike:train/valid[39,59,final]=(-0.57,-0.45,-0.48/-0.68,-0.62,-0.64) accuracy:train/valid[39,59,final]=(0.80,0.84,0.83/0.76,0.79,0.78)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
Expand All @@ -17,7 +21,7 @@ train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=_aug_1e
affix=_aug_1b


# End configuration section.
Expand Down Expand Up @@ -93,7 +97,7 @@ if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=30 \
Expand Down
2 changes: 1 addition & 1 deletion egs/cifar/v1/local/nnet3/run_cnn_aug_1c.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# accuracy improved from 85.8% to 88%

# steps/info/nnet3_dir_info.pl exp/cnn_aug_1c_cifar10/
# exp/cnn_aug_1c_cifar10/: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.24 loglike:train/valid[132,199,final]=(-0.18,-0.12,-0.12/-0.39,-0.37,-0.37) accuracy:train/valid[132,199,final]=(0.94,0.96,0.96/0.87,0.88,0.88)
# exp/cnn_aug_1c_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.23->-0.24 loglike:train/valid[132,199,final]=(-0.17,-0.12,-0.12/-0.39,-0.36,-0.37) accuracy:train/valid[132,199,final]=(0.94,0.96,0.96/0.87,0.88,0.88)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
Expand Down
144 changes: 144 additions & 0 deletions egs/cifar/v1/local/nnet3/run_resnet_1a.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/bin/bash

# run_resnet_1a.sh is a quite well-performing resnet.
# It includes a form of shrinkage that approximates l2 regularization.
# (c.f. --proportional-shrink).

# Definitely better:

# local/nnet3/compare.sh exp/resnet1a_cifar10
# System resnet1a_cifar10
# final test accuracy: 0.9481
# final train accuracy: 0.9992
# final test objf: -0.171369
# final train objf: -0.00980603
# num-parameters: 1322730

# local/nnet3/compare.sh exp/resnet1a_cifar100
# System resnet1a_cifar100
# final test accuracy: 0.7478
# final train accuracy: 0.9446
# final test objf: -0.899789
# final train objf: -0.22468
# num-parameters: 1345860



# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=1a


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/resnet${affix}_${dataset}

egs=exp/${dataset}_egs2

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.


nf1=48
nf2=96
nf3=256
nb3=128

common="required-time-offsets=0 height-offsets=-1,0,1"
res_opts="bypass-source=batchnorm"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-layer name=conv1 height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
output-layer name=output learning-rate-factor=0.1 dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=60 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=50.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
143 changes: 143 additions & 0 deletions egs/cifar/v1/local/nnet3/run_resnet_1b.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/bin/bash

# 1b is as 1a but using more epochs: 100 instead of 60.
# This helps a bit.

#exp/resnet1b_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.01->-0.01 loglike:train/valid[87,132,final]=(-0.13,-0.03,-0.01/-0.27,-0.21,-0.16) accuracy:train/valid[87,132,final]=(0.95,0.99,1.00/0.91,0.94,0.95)
#exp/resnet1b_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.22->-0.19 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.16/-1.22,-1.06,-0.89) accuracy:train/valid[87,132,final]=(0.78,0.93,0.96/0.67,0.72,0.76)


# local/nnet3/compare.sh exp/resnet1a_cifar10 exp/resnet1b_cifar10
# System resnet1a_cifar10 resnet1b_cifar10
# final test accuracy: 0.9481 0.9521
# final train accuracy: 0.9992 0.9998
# final test objf: -0.171369 -0.160283
# final train objf: -0.00980603 -0.00672504
# num-parameters: 1322730 1322730

# local/nnet3/compare.sh exp/resnet1a_cifar100 exp/resnet1b_cifar100
# System resnet1a_cifar100 resnet1b_cifar100
# final test accuracy: 0.7478 0.7597
# final train accuracy: 0.9446 0.9638
# final test objf: -0.899789 -0.889707
# final train objf: -0.22468 -0.163996
# num-parameters: 1345860 1345860

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=1b


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/resnet${affix}_${dataset}

egs=exp/${dataset}_egs2

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.


nf1=48
nf2=96
nf3=256
nb3=128

common="required-time-offsets=0 height-offsets=-1,0,1"
res_opts="bypass-source=batchnorm"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-layer name=conv1 height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
output-layer name=output learning-rate-factor=0.1 dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=100 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=50.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
Loading