diff --git a/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh new file mode 100755 index 00000000000..1eb448149ba --- /dev/null +++ b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh @@ -0,0 +1,134 @@ +#!/bin/bash + + +# 1d is as 1c but setting num-minibatches-history=40. +# A bit better. + +# local/nnet3/compare.sh exp/cnn_aug_1c_cifar10 exp/cnn_aug_1d_cifar10 +# System cnn_aug_1c_cifar10 cnn_aug_1d_cifar10 +# final test accuracy: 0.8834 0.8857 +# final train accuracy: 0.9644 0.9626 +# final test objf: -0.362241 -0.356861 +# final train objf: -0.114712 -0.114144 +# num-parameters: 2205290 2205290 + +# local/nnet3/compare.sh exp/cnn_aug_1c_cifar100 exp/cnn_aug_1d_cifar100 +# System cnn_aug_1c_cifar100 cnn_aug_1d_cifar100 +# final test accuracy: 0.6219 0.6237 +# final train accuracy: 0.8634 0.8688 +# final test objf: -1.42399 -1.40784 +# final train objf: -0.493349 -0.482047 +# num-parameters: 2251460 2251460 + + +# steps/info/nnet3_dir_info.pl exp/cnn_aug_1d_cifar10{,0} +# exp/cnn_aug_1d_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.23 loglike:train/valid[132,199,final]=(-0.172,-0.114,-0.114/-0.38,-0.36,-0.36) accuracy:train/valid[132,199,final]=(0.938,0.963,0.963/0.879,0.887,0.886) +# exp/cnn_aug_1d_cifar100: num-iters=200 nj=1..2 num-params=2.3M dim=96->100 combine=-0.90->-0.92 loglike:train/valid[132,199,final]=(-0.63,-0.48,-0.48/-1.43,-1.41,-1.41) accuracy:train/valid[132,199,final]=(0.821,0.868,0.869/0.61,0.62,0.62) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +dataset=cifar10 +srand=0 +reporting_email= +affix=_aug_1d + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2 + relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512 + output-layer name=output dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=100 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1c.sh b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh new file mode 100755 index 00000000000..0708b3d6eaa --- /dev/null +++ b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# 1c is as 1b but setting num-minibatches-history=40.0 in the configs, +# so the Fisher matrix estimates change less fast. +# Seems to be helpfu. + +# local/nnet3/compare.sh exp/resnet1b_cifar10 exp/resnet1c_cifar10 +# System resnet1b_cifar10 resnet1c_cifar10 +# final test accuracy: 0.9481 0.9514 +# final train accuracy: 0.9996 1 +# final test objf: -0.163336 -0.157244 +# final train objf: -0.00788341 -0.00751868 +# num-parameters: 1322730 1322730 + +# local/nnet3/compare.sh exp/resnet1b_cifar100 exp/resnet1c_cifar100 +# System resnet1b_cifar100 resnet1c_cifar100 +# final test accuracy: 0.7602 0.7627 +# final train accuracy: 0.9598 0.96 +# final test objf: -0.888699 -0.862205 +# final train objf: -0.164213 -0.174973 +# num-parameters: 1345860 1345860 +# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0} +# exp/resnet1c_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.02->-0.01 loglike:train/valid[87,132,final]=(-0.115,-0.034,-0.0075/-0.24,-0.21,-0.157) accuracy:train/valid[87,132,final]=(0.960,0.9888,1.0000/0.925,0.938,0.951) +# exp/resnet1c_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.24->-0.20 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.175/-1.20,-1.00,-0.86) accuracy:train/valid[87,132,final]=(0.78,0.923,0.960/0.67,0.73,0.76) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +dataset=cifar10 +srand=0 +reporting_email= +affix=1c + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1 + res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts + res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts + conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2 + res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts + res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts + conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3 + res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3 + output-layer name=output learning-rate-factor=0.1 dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=100 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0003 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.optimization.proportional-shrink=50.0 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh index 96e7254474a..e89ff125102 100755 --- a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh +++ b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh @@ -2,8 +2,9 @@ # nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97) -# steps/info/nnet3_dir_info.pl exp/cnn_aug1a: -# exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97) + +# steps/info/nnet3_dir_info.pl exp/cnn_aug1a +# exp/cnn_aug1a: num-iters=130 nj=2..4 num-params=2.8M dim=96->10 combine=-0.07->-0.06 loglike:train/valid[85,129,final]=(-0.090,-0.060,-0.054/-0.163,-0.110,-0.102) accuracy:train/valid[85,129,final]=(0.9764,0.9868,0.9886/0.958,0.9731,0.9762) # Set -e here so that we catch if any executable fails immediately set -euo pipefail diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh new file mode 100755 index 00000000000..cf2f92590d2 --- /dev/null +++ b/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# run_cnn_aug_1b.sh is like run_cnn_aug_1a.sh but setting +# num-minibatches-history=40.0 (longer history for natural gradient), +# and using the "egs2" examples with more archives, which necessitates +# adjusting the proportional-shrink option (since it should be +# proportional to archive size). + +# result improves 97.62 -> 97.71. + +# steps/info/nnet3_dir_info.pl exp/cnn_aug1b +# exp/cnn_aug1b: num-iters=180 nj=2..4 num-params=2.8M dim=96->10 combine=-0.06->-0.06 loglike:train/valid[119,179,final]=(-0.066,-0.051,-0.049/-0.126,-0.103,-0.100) accuracy:train/valid[119,179,final]=(0.9846,0.9890,0.9900/0.970,0.9760,0.9771) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +srand=0 +reporting_email= +affix=_aug1b + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2 + relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512 + output-layer name=output dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$cmd" \ + --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=30 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=4 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0003 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.optimization.proportional-shrink=18.0 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/svhn/v1/local/nnet3/run_resnet_1b.sh b/egs/svhn/v1/local/nnet3/run_resnet_1b.sh index 7e6ab60eae3..7f0540e90fe 100755 --- a/egs/svhn/v1/local/nnet3/run_resnet_1b.sh +++ b/egs/svhn/v1/local/nnet3/run_resnet_1b.sh @@ -1,6 +1,6 @@ #!/bin/bash -# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.055,-0.041,-0.035/-0.097,-0.079,-0.074) accuracy:train/valid[85,129,final]=(0.9882,0.9924,0.9946/0.977,0.9817,0.9840) +# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.049,-0.044,-0.036/-0.098,-0.085,-0.076) accuracy:train/valid[85,129,final]=(0.9904,0.9908,0.9940/0.9764,0.9804,0.9831) # This setup is based on the one in cifar/v1/local/nnet3/run_resnet_1{a,b}.sh. # We are reducing the number of epochs quite a bit, since there is so much @@ -107,7 +107,7 @@ fi if [ $stage -le 2 ]; then steps/nnet3/train_raw_dnn.py --stage=$train_stage \ - --cmd="$train_cmd" \ + --cmd="$cmd" \ --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \ --trainer.srand=$srand \ --trainer.max-param-change=2.0 \ diff --git a/egs/svhn/v1/local/nnet3/run_resnet_1c.sh b/egs/svhn/v1/local/nnet3/run_resnet_1c.sh new file mode 100755 index 00000000000..b56ee62b806 --- /dev/null +++ b/egs/svhn/v1/local/nnet3/run_resnet_1c.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +# resnet1c is as resnet1b but adding "num-minibatches-history=40.0" to +# all layers to increase the history size of natural gradient +# (improves optimization), and using the "egs2" egs with more, +# smaller archives. Also changing the proportional-shrink option +# to compensate for the change in archive size (it should vary +# proportionally to the number of egs in the archive). + +# improves 98.31 -> 98.45. + +# exp/resnet1c: num-iters=180 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.03 loglike:train/valid[119,179,final]=(-0.047,-0.041,-0.034/-0.083,-0.075,-0.071) accuracy:train/valid[119,179,final]=(0.9914,0.9922,0.9944/0.9803,0.9826,0.9845) + + + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +srand=0 +reporting_email= +affix=1b5 + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1 + res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts + res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts + conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2 + res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts + res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts + conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3 + res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3 + output-layer name=output learning-rate-factor=0.1 dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$cmd" \ + --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=30 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=4 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0003 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.optimization.proportional-shrink=18.0 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/svhn/v1/run.sh b/egs/svhn/v1/run.sh index fc2e2ef7733..720f4a13e29 100755 --- a/egs/svhn/v1/run.sh +++ b/egs/svhn/v1/run.sh @@ -19,3 +19,9 @@ if [ $stage -le 1 ]; then # egs preparation image/nnet3/get_egs.sh --egs-per-archive 50000 --cmd "$cmd" data/train_all data/test exp/egs fi + +if [ $stage -le 2 ]; then + # Making a version of the egs that have more archives with fewer egs each (this seems to + # slightly improve results). Eventually we'll disable the creation of the egs above. + image/nnet3/get_egs.sh --egs-per-archive 35000 --cmd "$cmd" data/train_all data/test exp/egs2 +fi diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh index 08eeba59c3d..4c578c20ad1 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh @@ -3,8 +3,8 @@ # run_tdnn_1e.sh is like run_tdnn_1d.sh but batchnorm components instead of renorm -exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.098->-0.097 xent:train/valid[167,252,final]=(-1.40,-1.34,-1.34/-1.50,-1.46,-1.46) logprob:train/valid[167,252,final]=(-0.091,-0.083,-0.083/-0.104,-0.101,-0.101) -exp/chain_cleaned/tdnn1e_sp_bi/: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.095->-0.095 xent:train/valid[167,252,final]=(-1.37,-1.31,-1.31/-1.47,-1.44,-1.44) logprob:train/valid[167,252,final]=(-0.087,-0.078,-0.078/-0.102,-0.099,-0.099) +# exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.098->-0.097 xent:train/valid[167,252,final]=(-1.40,-1.34,-1.34/-1.50,-1.46,-1.46) logprob:train/valid[167,252,final]=(-0.091,-0.083,-0.083/-0.104,-0.101,-0.101) +# exp/chain_cleaned/tdnn1e_sp_bi/: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.095->-0.095 xent:train/valid[167,252,final]=(-1.37,-1.31,-1.31/-1.47,-1.44,-1.44) logprob:train/valid[167,252,final]=(-0.087,-0.078,-0.078/-0.102,-0.099,-0.099) # local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1d_sp_bi exp/chain_cleaned/tdnn1e_sp_bi # System tdnn1d_sp_bi tdnn1e_sp_bi @@ -49,7 +49,7 @@ nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned # are just hardcoded at this level, in the commands below. train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1e #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. common_egs_dir= # you can set this to use previously dumped egs. # End configuration section. diff --git a/egs/wsj/s5/steps/info/nnet3_dir_info.pl b/egs/wsj/s5/steps/info/nnet3_dir_info.pl index ad4a86e4afd..06d07a63755 100755 --- a/egs/wsj/s5/steps/info/nnet3_dir_info.pl +++ b/egs/wsj/s5/steps/info/nnet3_dir_info.pl @@ -146,9 +146,9 @@ sub get_combine_info { sub number_to_string { my ($value, $name) = @_; my $precision; - if (abs($value) < 0.02 or ($name eq "accuracy" and abs($value) > 0.98)) { + if (abs($value) < 0.02 or ($name eq "accuracy" and abs($value) > 0.97)) { $precision = 4; - } elsif (abs($value) < 0.2 or ($name eq "accuracy" and abs($value) > 0.8)) { + } elsif (abs($value) < 0.2 or ($name eq "accuracy" and abs($value) > 0.7)) { $precision = 3; } else { $precision = 2; diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index da19b477337..f1e47b2794b 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -2670,8 +2670,8 @@ std::string NaturalGradientAffineComponent::Info() const { PrintParameterStats(stream, "bias", bias_params_, true); stream << ", rank-in=" << rank_in_ << ", rank-out=" << rank_out_ - << ", num_samples_history=" << num_samples_history_ - << ", update_period=" << update_period_ + << ", num-samples-history=" << num_samples_history_ + << ", update-period=" << update_period_ << ", alpha=" << alpha_; return stream.str(); } @@ -5375,7 +5375,8 @@ std::string BatchNormComponent::Info() const { std::ostringstream stream; stream << Type() << ", dim=" << dim_ << ", block-dim=" << block_dim_ << ", epsilon=" << epsilon_ << ", target-rms=" << target_rms_ - << ", count=" << count_; + << ", count=" << count_ + << ", test-mode=" << (test_mode_ ? "true" : "false"); if (count_ > 0) { Vector mean(stats_sum_), var(stats_sumsq_); mean.Scale(1.0 / count_);