Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions egs/swbd/s5c/local/xvector/train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@
set -e

stage=1
train_stage=1
train_stage=-10
generate_alignments=true # false if doing ctc training
speed_perturb=true

init_lr=0.003
final_lr=0.0003
max_change=2.0
use_gpu=true
feat_dim=40 # this is the MFCC dim we use in the hires features. you can't change it
# unless you change local/xvector/prepare_perturbed_data.sh to use a different
# MFCC config with a different dimension.
data=data/train_nodup_sp_hires # you can't change this without changing
# local/xvector/prepare_perturbed_data.sh
xvector_dim=200 # dimension of the xVector. configurable.
xvector_dir=exp/xvector_a
egs_dir=exp/xvector_a/egs


. ./path.sh
Expand All @@ -40,18 +44,21 @@ if [ $stage -le 3 ]; then
$xvector_dir/nnet.config
fi

if [ $stage -le 4 ]; then
if [ $stage -le 4 ] && [ -z "$egs_dir" ]; then
# dump egs.
steps/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
"$data" $xvector_dir/egs
"$data" $egs_dir
fi

if [ $stage -le 5 ]; then
# training for 4 epochs * 3 shifts means we see each eg 12
# times (3 different frame-shifts of the same eg are counted as different).
steps/nnet3/xvector/train.sh --cmd "$train_cmd" \
--num-epochs 4 --num-shifts 3 \
--num-jobs-initial 2 --num-jobs-final 8 \
--num-epochs 4 --num-shifts 3 --use-gpu $use_gpu --stage $train_stage \
--initial-effective-lrate $init_lr --final-effective-lrate $final_lr \
--num-jobs-initial 1 --num-jobs-final 8 \
--max-param-change $max_change \
--egs-dir $egs_dir \
$xvector_dir
fi

Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/xvector/make_jesus_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def WriteConfigs(self, f):
# just have an affine component for the first hidden layer.
# we don't need a nonlinearity as there is one at the input of
# the jesus component.
print('component name=x-affine1 type=AffineComponent '
print('component name=x-affine1 type=NaturalGradientAffineComponent '
'input-dim={0} output-dim={1} bias-stddev=0'.format(
cur_dim, args.jesus_input_dim), file=f)
print('component-node name=x-affine1 component=x-affine1 input={0}'.format(
Expand Down
11 changes: 5 additions & 6 deletions egs/wsj/s5/steps/nnet3/xvector/train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ cmd=run.pl
num_epochs=4 # Number of epochs of training;
# the number of iterations is worked out from this.
num_shifts=3
initial_effective_lrate=0.0003
final_effective_lrate=0.00003
initial_effective_lrate=0.003
final_effective_lrate=0.0003
num_jobs_initial=2 # Number of neural net jobs to run in parallel at the start of training
num_jobs_final=8 # Number of neural net jobs to run in parallel at the end of training
stage=-3
Expand Down Expand Up @@ -129,7 +129,7 @@ while [ $x -lt $num_iters ]; do

if [ $stage -le $x ]; then
echo "On iteration $x, learning rate is $this_learning_rate"

raw="nnet3-copy --learning-rate=$this_learning_rate $dir/$x.raw - |"
# Set off jobs doing some diagnostics, in the background.
# Use the egs dir from the previous iteration for the diagnostics
$cmd JOB=1:$num_diagnostic_archives $dir/log/compute_prob_valid.$x.JOB.log \
Expand All @@ -142,7 +142,7 @@ while [ $x -lt $num_iters ]; do
if [ $x -gt 0 ]; then
$cmd $dir/log/progress.$x.log \
nnet3-info $dir/$x.raw '&&' \
nnet3-show-progress --use-gpu=no $dir/$[$x-1].raw $dir/$x.raw &
nnet3-show-progress --use-gpu=no $dir/$[$x-1].raw $dir/$x.raw &
fi

echo "Training neural net (pass $x)"
Expand Down Expand Up @@ -174,8 +174,7 @@ while [ $x -lt $num_iters ]; do

$cmd $train_queue_opt $dir/log/train.$x.$n.log \
nnet3-xvector-train $parallel_train_opts --print-interval=10 \
--max-param-change=$max_param_change \
$dir/$x.raw \
--max-param-change=$max_param_change "$raw" \
"ark:nnet3-copy-egs ark:$egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --measure-output-frames=false --minibatch-size=$minibatch_size --discard-partial-minibatches=true ark:- ark:- |" \
$dir/$[$x+1].$n.raw || touch $dir/.error &
done
Expand Down
40 changes: 28 additions & 12 deletions src/xvector/nnet-xvector-training.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,14 @@ NnetXvectorTrainer::NnetXvectorTrainer(const NnetTrainerOptions &config,
nnet_(nnet),
compiler_(*nnet, config_.optimize_config),
num_minibatches_processed_(0) {
if (config.zero_component_stats)
if (config_.zero_component_stats)
ZeroComponentStats(nnet);
if (config.momentum == 0.0 && config.max_param_change == 0.0) {
if (config_.momentum == 0.0 &&
config_.max_param_change == 0.0) {
delta_nnet_= NULL;
} else {
KALDI_ASSERT(config.momentum >= 0.0 &&
config.max_param_change >= 0.0);
KALDI_ASSERT(config_.momentum >= 0.0 &&
config_.max_param_change >= 0.0);
delta_nnet_ = nnet_->Copy();
bool is_gradient = false; // setting this to true would disable the
// natural-gradient updates.
Expand Down Expand Up @@ -94,7 +95,8 @@ void NnetXvectorTrainer::Train(const NnetExample &eg) {
ScaleNnet(config_.momentum, delta_nnet_);
}
if (config_.write_cache != "") {
Output ko(config_.write_cache, config_.binary_write_cache);
Output ko(config_.write_cache,
config_.binary_write_cache);
compiler_.WriteCache(ko.Stream(), config_.binary_write_cache);
}
}
Expand Down Expand Up @@ -143,7 +145,8 @@ void NnetXvectorTrainer::ProcessOutputs(NnetComputer *computer) {
computer->AcceptOutputDeriv(b_name, &deriv_b_mat);
}

objf_info_[xvector_name].UpdateStats(xvector_name, config_.print_interval,
objf_info_[xvector_name].UpdateStats(xvector_name,
config_.print_interval,
num_minibatches_processed_++,
tot_weight, tot_objf);
}
Expand Down Expand Up @@ -246,7 +249,7 @@ void GetComputationRequestXvector(const Nnet &nnet,
request->need_model_derivative = need_model_derivative;
request->store_component_stats = store_component_stats;

// xvector-egs have multiple inputs(e.g. different inputs correspond
// xvector-egs has multiple inputs(e.g. different inputs correspond
// to different chunks and no outputs.
for (size_t i = 0; i < eg.io.size(); i++) {
const NnetIo &io = eg.io[i];
Expand All @@ -263,21 +266,34 @@ void GetComputationRequestXvector(const Nnet &nnet,
IoSpecification &io_spec = dest.back();
io_spec.name = name;
io_spec.indexes = io.indexes;
io_spec.has_deriv = nnet.IsOutputNode(node_index) && need_model_derivative;
io_spec.has_deriv = false;
}

// We only need the output on frame t=0 for each n.
// So the output index for output node is (n, 0, 0)
// for n = 0,.., min number of n-values for different t
// in input indexes.
// indexes for "s" and "b" output nodes are equal to (0,0,0).
int32 io_index_size = request->inputs[0].indexes.size(),
n_indx_size = 0;
n_indx_size = 1e6, t_ind;
std::vector<Index> output_indexes,
affine_output_indexes;
affine_output_indexes.resize(1);
affine_output_indexes[0].n = 0;
affine_output_indexes[0].t = 0;

std::map<int32, int32> n_indx_sizes;
for (int32 indx = 0; indx < io_index_size; indx++) {
t_ind = request->inputs[0].indexes[indx].t;
if (n_indx_sizes.count(t_ind) != 0)
n_indx_sizes[t_ind] += 1;
else
n_indx_sizes.insert(std::make_pair(t_ind, 1));
}
std::map<int32, int32>::const_iterator iter;
for (iter = n_indx_sizes.begin(); iter != n_indx_sizes.end(); iter++)
n_indx_size = std::min(n_indx_size, iter->second);

for (int32 indx = 0; indx < io_index_size; indx++)
if (request->inputs[0].indexes[indx].t == 0)
n_indx_size++;

output_indexes.resize(n_indx_size);
for (int32 indx = 0; indx < n_indx_size; indx++) {
Expand Down
3 changes: 3 additions & 0 deletions src/xvector/xvector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ void ComputeXvectorObjfAndDeriv(
KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim);
KALDI_ASSERT(deriv_xvector->NumRows() == N);
KALDI_ASSERT(deriv_S->Dim() == S_dim);
deriv_xvector->Set(0.0);
deriv_S->Set(0.0);
(*deriv_b) = 0.0;
}

CuMatrix<BaseFloat> S_tmp(S),
Expand Down