Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions egs/cifar/v1/local/nnet3/run_cnn_1d.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/bin/bash

# 1d is as 1c but adding batch-norm to all convolutional layers.
# batch-norm helps (0.78 -> 0.8).

# exp/cnn1d_cifar10: num-iters=60 nj=1..2 num-params=4.3M dim=96->10 combine=-0.10->-0.08 loglike:train/valid[39,59,final]=(-0.03,-0.00,-0.00/-0.63,-0.69,-0.68) accuracy:train/valid[39,59,final]=(1.00,1.00,1.00/0.81,0.82,0.82)


# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=1d


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/cnn${affix}_${dataset}

egs=exp/${dataset}_egs

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.

common1="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=32"
common2="required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=64"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-1,0,1 $common2
conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-1,0,1 dropout-proportion=0.25 $common2 height-subsample-out=2
relu-dropout-layer name=fully_connected1 input=Append(0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30) dropout-proportion=0.5 dim=512
output-layer name=output dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=30 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
1 change: 1 addition & 0 deletions egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
'conv-relu-renorm-layer': xlayers.XconfigConvLayer,
'relu-conv-batchnorm-layer': xlayers.XconfigConvLayer,
'conv-relu-batchnorm-layer': xlayers.XconfigConvLayer,
'conv-relu-batchnorm-dropout-layer': xlayers.XconfigConvLayer,
'conv-relu-dropout-layer': xlayers.XconfigConvLayer,
'relu-dropout-layer': xlayers.XconfigBasicLayer

Expand Down
18 changes: 11 additions & 7 deletions src/chainbin/nnet3-chain-combine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ int main(int argc, char *argv[]) {
" nnet3-combine den.fst 35.raw 36.raw 37.raw 38.raw ark:valid.cegs final.raw\n";

bool binary_write = true;
bool batchnorm_test_mode = false,
dropout_test_mode = true;
std::string use_gpu = "yes";
NnetCombineConfig combine_config;
chain::ChainTrainingOptions chain_config;
Expand All @@ -49,6 +51,11 @@ int main(int argc, char *argv[]) {
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("use-gpu", &use_gpu,
"yes|no|optional|wait, only has effect if compiled with CUDA");
po.Register("batchnorm-test-mode", &batchnorm_test_mode,
"If true, set test-mode to true on any BatchNormComponents.");
po.Register("dropout-test-mode", &dropout_test_mode,
"If true, set test-mode to true on any DropoutComponents and "
"DropoutMaskComponents.");

combine_config.Register(&po);
chain_config.Register(&po);
Expand Down Expand Up @@ -77,13 +84,10 @@ int main(int argc, char *argv[]) {
Nnet nnet;
ReadKaldiObject(raw_nnet_rxfilename, &nnet);

// This is needed for batch-norm. We also ensure in the calling script
// that the freshest model comes first on the command line; this
// means we use the freshest batch-norm stats. (Since the batch-norm
// stats are not technically parameters, they are not subject to
// combination like the rest of the model parameters).
SetBatchnormTestMode(true, &nnet);
SetDropoutTestMode(true, &nnet);
if (batchnorm_test_mode)
SetBatchnormTestMode(true, &nnet);
if (dropout_test_mode)
SetDropoutTestMode(true, &nnet);

std::vector<NnetChainExample> egs;
egs.reserve(10000); // reserve a lot of space to minimize the chance of
Expand Down
1 change: 0 additions & 1 deletion src/chainbin/nnet3-chain-compute-prob.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ int main(int argc, char *argv[]) {

po.Register("batchnorm-test-mode", &batchnorm_test_mode,
"If true, set test-mode to true on any BatchNormComponents.");

po.Register("dropout-test-mode", &dropout_test_mode,
"If true, set test-mode to true on any DropoutComponents and "
"DropoutMaskComponents.");
Expand Down
14 changes: 13 additions & 1 deletion src/nnet3/nnet-chain-combine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "nnet3/nnet-chain-combine.h"
#include "nnet3/nnet-utils.h"
#include "nnet3/nnet-chain-training.h"

namespace kaldi {
namespace nnet3 {
Expand All @@ -38,7 +39,6 @@ NnetChainCombiner::NnetChainCombiner(const NnetCombineConfig &combine_config,
nnet_params_(std::min(num_nnets, combine_config_.max_effective_inputs),
NumParameters(first_nnet)),
tot_input_weighting_(nnet_params_.NumRows()) {
SetDropoutProportion(0, &nnet_);

if (combine_config_.sum_to_one_penalty != 0.0 &&
combine_config_.enforce_sum_to_one) {
Expand Down Expand Up @@ -182,6 +182,18 @@ void NnetChainCombiner::Combine() {
ComputeObjfAndDerivFromParameters(final_params, &deriv);
}
PrintParams(final_params);
if (HasBatchnorm(nnet_)) {
RecomputeBatchnormStats();
}
}

void NnetChainCombiner::RecomputeBatchnormStats() {
KALDI_LOG << "Recomputing batch-norm stats on nnet.";
NnetChainTrainingOptions train_opts;
train_opts.nnet_config.train = false;
NnetChainTrainer trainer(train_opts, den_fst_, &nnet_);
for (size_t i = 0; i < egs_.size(); i++)
trainer.Train(egs_[i]);
}


Expand Down
1 change: 1 addition & 0 deletions src/nnet3/nnet-chain-combine.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class NnetChainCombiner {

void ComputeUpdatableComponentDims();
void FinishPreprocessingInput();
void RecomputeBatchnormStats();

};

Expand Down
24 changes: 15 additions & 9 deletions src/nnet3/nnet-chain-training.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts,


void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
bool need_model_derivative = true;
const NnetTrainerOptions &nnet_config = opts_.nnet_config;
bool need_model_derivative = nnet_config.train;
bool use_xent_regularization = (opts_.chain_config.xent_regularize != 0.0);
ComputationRequest request;
GetChainComputationRequest(*nnet_, chain_eg, need_model_derivative,
Expand All @@ -73,16 +73,21 @@ void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
// give the inputs to the computer object.
computer.AcceptInputs(*nnet_, chain_eg.inputs);
computer.Run();

this->ProcessOutputs(chain_eg, &computer);
computer.Run();

UpdateParamsWithMaxChange();
if (nnet_config.train) {
computer.Run();
UpdateParamsWithMaxChange();
} else {
// all parameter derivs will be zero; here we're just adding the stored stats.
AddNnet(*delta_nnet_, 1.0, nnet_);
ScaleNnet(0.0, delta_nnet_);
}
}


void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg,
NnetComputer *computer) {
bool train = opts_.nnet_config.train;
// normally the eg will have just one output named 'output', but
// we don't assume this.
std::vector<NnetChainSupervision>::const_iterator iter = eg.outputs.begin(),
Expand Down Expand Up @@ -111,7 +116,7 @@ void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg,
ComputeChainObjfAndDeriv(opts_.chain_config, den_graph_,
sup.supervision, nnet_output,
&tot_objf, &tot_l2_term, &tot_weight,
&nnet_output_deriv,
(train ? &nnet_output_deriv : NULL),
(use_xent ? &xent_deriv : NULL));

if (use_xent) {
Expand All @@ -126,20 +131,21 @@ void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg,
tot_weight, xent_objf);
}

if (opts_.apply_deriv_weights && sup.deriv_weights.Dim() != 0) {
if (train && opts_.apply_deriv_weights && sup.deriv_weights.Dim() != 0) {
CuVector<BaseFloat> cu_deriv_weights(sup.deriv_weights);
nnet_output_deriv.MulRowsVec(cu_deriv_weights);
if (use_xent)
xent_deriv.MulRowsVec(cu_deriv_weights);
}

computer->AcceptInput(sup.name, &nnet_output_deriv);
if (train)
computer->AcceptInput(sup.name, &nnet_output_deriv);

objf_info_[sup.name].UpdateStats(sup.name, opts_.nnet_config.print_interval,
num_minibatches_processed_++,
tot_weight, tot_objf, tot_l2_term);

if (use_xent) {
if (train && use_xent) {
xent_deriv.Scale(opts_.chain_config.xent_regularize);
computer->AcceptInput(xent_name, &xent_deriv);
}
Expand Down
14 changes: 13 additions & 1 deletion src/nnet3/nnet-combine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config,
<< " is nonzero, so setting --enforce-sum-to-one=false.";
config_.enforce_sum_to_one = false;
}
SetDropoutProportion(0, &nnet_);
SubVector<BaseFloat> first_params(nnet_params_, 0);
VectorizeNnet(nnet_, &first_params);
tot_input_weighting_(0) += 1.0;
Expand Down Expand Up @@ -178,6 +177,19 @@ void NnetCombiner::Combine() {
ComputeObjfAndDerivFromParameters(final_params, &deriv);
}
PrintParams(final_params);

if (HasBatchnorm(nnet_)) {
RecomputeBatchnormStats();
}
}

void NnetCombiner::RecomputeBatchnormStats() {
KALDI_LOG << "Recomputing batch-norm stats on nnet.";
NnetTrainerOptions train_opts;
train_opts.train = false;
NnetTrainer trainer(train_opts, &nnet_);
for (size_t i = 0; i < egs_.size(); i++)
trainer.Train(egs_[i]);
}


Expand Down
2 changes: 2 additions & 0 deletions src/nnet3/nnet-combine.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ class NnetCombiner {

void ComputeUpdatableComponentDims();
void FinishPreprocessingInput();
void RecomputeBatchnormStats();


};

Expand Down
15 changes: 11 additions & 4 deletions src/nnet3/nnet-training.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config,


void NnetTrainer::Train(const NnetExample &eg) {
bool need_model_derivative = true;
bool need_model_derivative = config_.train;
ComputationRequest request;
GetComputationRequest(*nnet_, eg, need_model_derivative,
config_.store_component_stats,
Expand All @@ -69,9 +69,16 @@ void NnetTrainer::Train(const NnetExample &eg) {
computer.Run();

this->ProcessOutputs(eg, &computer);
computer.Run();

UpdateParamsWithMaxChange();
if (config_.train) {
computer.Run();

UpdateParamsWithMaxChange();
} else {
// all parameter derivs will be zero; here we're just adding the stored stats.
AddNnet(*delta_nnet_, 1.0, nnet_);
ScaleNnet(0.0, delta_nnet_);
}
}

void NnetTrainer::ProcessOutputs(const NnetExample &eg,
Expand All @@ -85,7 +92,7 @@ void NnetTrainer::ProcessOutputs(const NnetExample &eg,
if (nnet_->IsOutputNode(node_index)) {
ObjectiveType obj_type = nnet_->GetNode(node_index).u.objective_type;
BaseFloat tot_weight, tot_objf;
bool supply_deriv = true;
bool supply_deriv = config_.train;
ComputeObjectiveFunction(io.features, obj_type, io.name,
supply_deriv, computer,
&tot_weight, &tot_objf);
Expand Down
6 changes: 6 additions & 0 deletions src/nnet3/nnet-training.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ namespace nnet3 {
struct NnetTrainerOptions {
bool zero_component_stats;
bool store_component_stats;
bool train;
int32 print_interval;
bool debug_computation;
BaseFloat momentum;
Expand All @@ -46,6 +47,7 @@ struct NnetTrainerOptions {
NnetTrainerOptions():
zero_component_stats(true),
store_component_stats(true),
train(true),
print_interval(100),
debug_computation(false),
momentum(0.0),
Expand Down Expand Up @@ -76,6 +78,10 @@ struct NnetTrainerOptions {
"write the cached computation to");
opts->Register("binary-write-cache", &binary_write_cache, "Write "
"computation cache in binary mode");
opts->Register("train", &train, "If true, actually do the training "
"(if false, it will do only the forward propagation, "
"which affects stored stats for batch-norm, among other "
"things.)");

// register the optimization options with the prefix "optimization".
ParseOptions optimization_opts("optimization", opts);
Expand Down
10 changes: 10 additions & 0 deletions src/nnet3/nnet-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,16 @@ void SetDropoutProportion(BaseFloat dropout_proportion,
}
}

bool HasBatchnorm(const Nnet &nnet) {
for (int32 c = 0; c < nnet.NumComponents(); c++) {
const Component *comp = nnet.GetComponent(c);
const BatchNormComponent *bc =
dynamic_cast<const BatchNormComponent*>(comp);
if (bc != NULL)
return true;
}
return false;
}

void SetBatchnormTestMode(bool test_mode, Nnet *nnet) {
for (int32 c = 0; c < nnet->NumComponents(); c++) {
Expand Down
Loading