From 92ad8baaafe6e7b24cdf86f5fad7967e8e9faab6 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Wed, 12 Oct 2016 05:01:53 -0400
Subject: [PATCH 1/9] Augmentation recipe for swbd

---
 .../chain/multi_condition/run_tdnn_7b.sh      | 226 ++++++++++++++++++
 .../nnet3/multi_condition/copy_ali_dir.sh     |  78 ++++++
 .../multi_condition/run_ivector_common.sh     | 148 ++++++++++++
 3 files changed, 452 insertions(+)
 create mode 100755 egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
 create mode 100755 egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh
 create mode 100755 egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh

diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
new file mode 100755
index 00000000000..7a8c08970e2
--- /dev/null
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+
+set -e
+
+# configs for 'chain'
+affix=
+stage=1
+train_stage=-10
+get_egs_stage=-10
+speed_perturb=true
+dir=exp/chain/tdnn_7b  # Note: _sp will get added to this if $speed_perturb == true.
+decode_iter=
+iv_dir=exp/nnet3_rvb
+num_data_reps=1
+
+# TDNN options
+# this script uses the new tdnn config generator so it needs a final 0 to reflect that the final layer input has no splicing
+# smoothing options
+pool_window=
+pool_type='none'
+pool_lpfilter_width=
+self_repair_scale=0.00001
+# training options
+num_epochs=2
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+leftmost_questions_truncate=-1
+max_param_change=2.0
+final_layer_normalize_target=0.5
+num_jobs_initial=3
+num_jobs_final=16
+minibatch_size=128
+relu_dim=625
+frames_per_eg=150
+remove_egs=true
+common_egs_dir=
+xent_regularize=0.1
+
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+if [ "$speed_perturb" == "true" ]; then
+  suffix=_sp
+fi
+
+dir=${dir}${affix:+_$affix}${suffix}_rvb${num_data_reps}_mix
+train_set=train_nodup${suffix}_rvb${num_data_reps}_mix
+lang=data/lang_chain_2y
+treedir=exp/chain/tri5_2y_tree${suffix}
+lat_dir=exp/tri4_lats_nodup${suffix}
+rvb_lat_dir=${lat_dir}_rvb${num_data_reps}_mix
+
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+echo local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
+  --clean-data-dir train_nodup${suffix} \
+  --iv-dir $iv_dir \
+  --num-data-reps $num_data_reps || exit 1;
+
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the CTC training more freedom).
+  # use the same num-jobs as the alignments
+  
+  mkdir -p $rvb_lat_dir/temp/
+  lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp
+
+  # copy the lattices for the reverberated data
+  rm -f $rvb_lat_dir/temp/combined_lats.scp
+  touch $rvb_lat_dir/temp/combined_lats.scp
+  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
+  for i in `seq 0 $num_data_reps`; do
+    cat $rvb_lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $rvb_lat_dir/temp/combined_lats.scp
+  done
+  sort -u $rvb_lat_dir/temp/combined_lats.scp > $rvb_lat_dir/temp/combined_lats_sorted.scp
+
+  lattice-copy scp:$rvb_lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$rvb_lat_dir/lat.1.gz" || exit 1;
+  echo "1" > $rvb_lat_dir/num_jobs
+
+  # copy other files from original lattice dir
+  for f in cmvn_opts final.mdl splice_opts tree; do
+    cp $lat_dir/$f $rvb_lat_dir/$f
+  done
+fi
+
+
+if [ $stage -le 10 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 11 ]; then
+  # Build a tree using our new topology.
+  # we build the tree using clean features (data/train) rather than
+  # the augmented features (data/train_rvb) to get better alignments
+
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --leftmost-questions-truncate $leftmost_questions_truncate \
+      --cmd "$train_cmd" 9000 data/train_nodup${suffix} $lang exp/tri4_ali_nodup${suffix} $treedir
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs";
+  if [ ! -z "$relu_dim" ]; then
+    dim_opts="--relu-dim $relu_dim"
+  else
+    dim_opts="--pnorm-input-dim $pnorm_input_dim --pnorm-output-dim  $pnorm_output_dim"
+  fi
+
+  # create the config files for nnet initialization
+  pool_opts=
+  pool_opts=$pool_opts${pool_type:+" --pool-type $pool_type "}
+  pool_opts=$pool_opts${pool_window:+" --pool-window $pool_window "}
+  pool_opts=$pool_opts${pool_lpfilter_width:+" --pool-lpfilter-width $pool_lpfilter_width "}
+  repair_opts=${self_repair_scale:+" --self-repair-scale $self_repair_scale "}
+
+  steps/nnet3/tdnn/make_configs.py $pool_opts \
+    $repair_opts \
+    --feat-dir data/${train_set}_hires \
+    --ivector-dir $iv_dir/ivectors_${train_set} \
+    --tree-dir $treedir \
+    $dim_opts \
+    --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
+    --use-presoftmax-prior-scale false \
+    --xent-regularize $xent_regularize \
+    --xent-separate-forward-affine true \
+    --include-log-softmax false \
+    --final-layer-normalize-target $final_layer_normalize_target \
+    $dir/configs || exit 1;
+fi
+
+
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+ touch $dir/egs/.nodelete # keep egs around when that run dies.
+
+ steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $iv_dir/ivectors_${train_set} \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_eg \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.max-param-change $max_param_change \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir data/${train_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $rvb_lat_dir \
+    --dir $dir  || exit 1;
+
+fi
+
+if [ $stage -le 13 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg
+fi
+
+decode_suff=sw1_tg
+graph_dir=$dir/graph_sw1_tg
+if [ $stage -le 14 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    iter_opts=" --iter $decode_iter "
+  fi
+  for decode_set in eval2000; do
+      (
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj 30 --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $iv_dir/ivectors_${decode_set} \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_${decode_suff} || exit 1;
+      if $has_fisher; then
+          steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+            data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
+            $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1;
+      fi
+      ) &
+  done
+fi
+wait;
+exit 0;
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh b/egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh
new file mode 100755
index 00000000000..42ea2dc4b9d
--- /dev/null
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Copyright 2014  Johns Hopkins University (author: Vijayaditya Peddinti)
+# Apache 2.0
+
+# This script operates on a directory, such as in exp/tri4a_ali,
+# that contains some subset of the following files:
+#  ali.*.gz
+#  tree
+#  cmvn_opts
+#  splice_opts
+#  num_jobs
+#  final.mdl
+# It copies to another directory, possibly adding a specified prefix or a suffix
+# to the utterance names.
+
+
+# begin configuration section
+utt_prefix=
+utt_suffix=
+cmd=run.pl
+# end configuration section
+
+. utils/parse_options.sh
+
+if [ $# != 2 ]; then
+  echo "Usage: "
+  echo "  $0 [options] <src_dir> <dest_dir>"
+  echo "e.g.:"
+  echo " $0  --utt-prefix=1- exp/tri4a_ali exp/tri4a_rev1_ali"
+  echo "Options"
+  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
+  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
+  exit 1;
+fi
+
+
+export LC_ALL=C
+
+src_dir=$1
+dest_dir=$2
+
+mkdir -p $dest_dir
+
+if [ ! -f $src_dir/ali.1.gz ]; then
+  echo "copy_ali_dir.sh: no such files $src_dir/ali.*.gz"
+  exit 1;
+fi
+
+for f in tree cmvn_opts splice_opts num_jobs final.mdl; do
+  if [ ! -f $src_dir/$f ]; then
+    echo "copy_ali_dir.sh: no such file $src_dir/$f this might be serious error."
+    continue
+  fi
+  cp $src_dir/$f $dest_dir/
+done
+
+nj=$(cat $dest_dir/num_jobs)
+mkdir -p $dest_dir/temp
+cat << EOF > $dest_dir/temp/copy_ali.sh
+set -e;
+id=\$1
+echo "$src_dir/ali.\$id.gz"
+gunzip -c $src_dir/ali.\$id.gz | \
+  copy-int-vector ark:- ark,t:- | \
+python -c "
+import sys
+for line in sys.stdin:
+  parts = line.split()
+  print '$utt_prefix{0}$utt_suffix {1}'.format(parts[0], ' '.join(parts[1:]))
+" | \
+  gzip -c >$dest_dir/ali.\$id.gz || exit 1;
+set +o pipefail; # unset the pipefail option.
+EOF
+chmod +x $dest_dir/temp/copy_ali.sh
+$cmd -v PATH JOB=1:$nj $dest_dir/temp/copy_ali.JOB.log $dest_dir/temp/copy_ali.sh JOB || exit 1;
+
+echo "$0: copied alignments from $src_dir to $dest_dir"
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
new file mode 100755
index 00000000000..126bf17b557
--- /dev/null
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
@@ -0,0 +1,148 @@
+#!/bin/bash 
+#set -e
+# this script is based on local/nnet3/run_ivector_common.sh
+# but it operates on corrupted training/dev/test data sets
+
+. cmd.sh
+
+stage=1
+foreground_snrs="20:10:15:5:0"
+background_snrs="20:10:15:5:0"
+num_data_reps=1
+clean_data_dir=train_nodup_sp
+iv_dir=exp/nnet3_rvb
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+mkdir -p $iv_dir
+train_set=${clean_data_dir}_rvb${num_data_reps}
+
+if [ $stage -le 1 ]; then
+  if [ ! -d "RIRS_NOISES" ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+
+  # corrupt the data to generate reverberated data 
+  python steps/data/reverberate_data_dir.py \
+    --prefix "rev" \
+    --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/smallroom/rir_list" \
+    --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/mediumroom/rir_list" \
+    --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/largeroom/rir_list" \
+    --rir-set-parameters "0.25, RIRS_NOISES/real_rirs_isotropic_noises/rir_list" \
+    --foreground-snrs $foreground_snrs \
+    --background-snrs $background_snrs \
+    --speech-rvb-probability 1 \
+    --pointsource-noise-addition-probability 1 \
+    --isotropic-noise-addition-probability 1 \
+    --num-replications $num_data_reps \
+    --max-noises-per-minute 1 \
+    --source-sampling-rate 8000 \
+    data/${clean_data_dir} data/${train_set}
+fi
+
+
+if [ $stage -le 2 ]; then
+  mfccdir=mfcc_rvb
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    date=$(date +'%m_%d_%H_%M')
+    utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for dataset in $train_set; do
+    utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
+
+    # do volume-perturbation on the training data prior to extracting hires
+    # features; this helps make trained nnets more invariant to test data volume.
+    utils/data/perturb_data_dir_volume.sh data/${dataset}_hires
+
+    steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
+    steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir;
+    utils/fix_data_dir.sh data/${dataset}_hires;
+  done
+fi
+
+
+# ivector extractor training
+if [ $stage -le 5 ]; then
+  # Here we want to build a 200k system, half from the reverberated set and half from the original set
+  local/nnet3/multi_condition/copy_ali_dir.sh --utt-prefix "rev1_sp1.0-" exp/tri2_ali_100k_nodup exp/tri2_ali_100k_nodup_rvb || exit 1;
+  local/nnet3/multi_condition/copy_ali_dir.sh --utt-prefix "rev0_sp1.0-" exp/tri2_ali_100k_nodup exp/tri2_ali_100k_nodup_clean || exit 1;
+
+  # want the 100k subset to exactly match train_100k, since we'll use its alignments.
+  awk -v p='rev1_sp1.0-' '{printf "%s%s\n", p, $1}' data/train_100k_nodup/utt2spk > uttlist
+  utils/subset_data_dir.sh --utt-list uttlist \
+    data/${train_set}_hires data/${train_set}_100k_hires
+  rm uttlist
+
+  # Mix the 100k original data and the 100k reverberated data
+  utils/copy_data_dir.sh --spk-prefix "rev0_sp1.0-" --utt-prefix "rev0_sp1.0-" data/train_100k_nodup_hires data/train_100k_nodup_hires_tmp
+  utils/combine_data.sh data/${train_set}_200k_mix_hires data/train_100k_nodup_hires_tmp data/${train_set}_100k_hires
+  rm -r data/train_100k_nodup_hires_tmp
+
+  # combine the alignment for mixed data
+  steps/combine_ali_dirs.sh --num-jobs 30 data/${train_set}_200k_mix_hires exp/tri2_ali_200k_mix exp/tri2_ali_100k_nodup_clean exp/tri2_ali_100k_nodup_rvb || exit 1;
+  rm -r exp/tri2_ali_100k_nodup_clean exp/tri2_ali_100k_nodup_rvb
+
+  # We need to build a small system just because we need the LDA+MLLT transform
+  # to train the diag-UBM on top of.  We use --num-iters 13 because after we get
+  # the transform (12th iter is the last), any further training is pointless.
+  # this decision is based on fisher_english
+  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
+    --splice-opts "--left-context=3 --right-context=3" \
+    5500 90000 data/${train_set}_200k_mix_hires \
+    data/lang_nosp exp/tri2_ali_200k_mix $iv_dir/tri3b
+fi
+
+if [ $stage -le 6 ]; then
+  utils/copy_data_dir.sh --spk-prefix "rev0_" --utt-prefix "rev0_" data/${clean_data_dir}_30k_nodup_hires data/${clean_data_dir}_30k_nodup_hires_tmp
+  # want the reverberated 30k subset to exactly match clean 30k, since we'll use its alignments.
+  awk -v p='rev1_' '{printf "%s%s\n", p, $1}' data/${clean_data_dir}_30k_nodup_hires/utt2spk > uttlist
+  utils/subset_data_dir.sh --utt-list uttlist \
+    data/${train_set}_hires data/${train_set}_30k_hires
+  rm uttlist
+
+  # Mix the 30k original data and the 30k reverberated data
+  utils/combine_data.sh data/${train_set}_60k_mix_hires data/${clean_data_dir}_30k_nodup_hires_tmp data/${train_set}_30k_hires
+  rm -r data/${clean_data_dir}_30k_nodup_hires_tmp
+
+  # To train a diagonal UBM we don't need very much data, so use the smallest subset.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
+    data/${train_set}_60k_mix_hires 512 $iv_dir/tri3b $iv_dir/diag_ubm
+fi
+
+if [ $stage -le 7 ]; then
+  # iVector extractors can be sensitive to the amount of data, but this one has a
+  # fairly small dim (defaults to 100) so we don't use all of it, we use just the
+  # 100k subset (just under half the data).
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/${train_set}_200k_mix_hires $iv_dir/diag_ubm $iv_dir/extractor || exit 1;
+fi
+
+if [ $stage -le 8 ]; then
+  # We extract iVectors on all the train_nodup data, which will be what we
+  # train the system on.
+  # handle per-utterance decoding well (iVector starts at zero).
+
+  # Mix all the original data and all the reverberated data
+  utils/copy_data_dir.sh --spk-prefix "rev0_" --utt-prefix "rev0_" data/${clean_data_dir}_hires data/${clean_data_dir}_hires_clean
+  utils/combine_data.sh data/${train_set}_mix_hires data/${clean_data_dir}_hires_clean data/${train_set}_hires
+
+  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_mix_hires data/${train_set}_mix_max2_hires
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
+    data/${train_set}_mix_max2_hires $iv_dir/extractor $iv_dir/ivectors_${train_set}_mix || exit 1;
+  
+  for data_set in eval2000; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
+      data/${data_set}_hires $iv_dir/extractor $iv_dir/ivectors_$data_set || exit 1;
+  done
+fi
+
+exit 0;
+

From 0bcf41ee1a8a6b9fb4ed9a16c130daf139b66992 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Wed, 12 Oct 2016 21:44:22 -0400
Subject: [PATCH 2/9] result added

---
 egs/swbd/s5c/RESULTS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/egs/swbd/s5c/RESULTS b/egs/swbd/s5c/RESULTS
index 6223c4ca319..471e088ffba 100644
--- a/egs/swbd/s5c/RESULTS
+++ b/egs/swbd/s5c/RESULTS
@@ -185,6 +185,13 @@ exit 0
 %WER 10.4 | 1831 21395 | 90.7 6.1 3.2 1.2 10.4 44.6 | exp/chain/tdnn_7d_sp/decode_eval2000_sw1_fsh_fg/score_11_1.0/eval2000_hires.ctm.swbd.filt.sys
 %WER 11.6 | 1831 21395 | 89.7 7.0 3.3 1.4 11.6 47.0 | exp/chain/tdnn_7d_sp/decode_eval2000_sw1_tg/score_10_1.0/eval2000_hires.ctm.swbd.filt.sys
 
+
+# results with chain TDNNs (2 epoch training on data reverberated with room impulse responses) (see local/chain/multi_condition/run_tdnn_7b.sh)
+%WER 10.0 | 1831 21395 | 91.0 6.0 3.0 1.1 10.0 43.8 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.swbd.filt.sys
+%WER 20.0 | 2628 21594 | 82.1 11.7 6.2 2.1 20.0 55.6 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
+%WER 15.0 | 4459 42989 | 86.5 8.8 4.7 1.6 15.0 50.7 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.filt.sys
+
+
 # current best 'chain' models with LSTM (see local/chain/run_lstm_d.sh)
 %WER 15.9 | 4459 42989 | 86.0 9.6 4.3 2.0 15.9 51.7 | exp/chain/lstm_d_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
 %WER 10.5 | 1831 21395 | 90.8 6.4 2.9 1.3 10.5 44.3 | exp/chain/lstm_d_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_1.0/eval2000_hires.ctm.swbd.filt.sys

From 178c9d1aa7d91117afe1faed04cbab80cce92096 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Mon, 24 Oct 2016 22:55:08 -0400
Subject: [PATCH 3/9] remove copy_ali_dir.sh; add --include-original-data to
 reverberate script; modify swbd-rvb script

---
 egs/swbd/s5c/RESULTS                          |   6 +-
 .../chain/multi_condition/run_tdnn_7b.sh      |  41 ++++---
 .../nnet3/multi_condition/copy_ali_dir.sh     |  78 -------------
 .../multi_condition/run_ivector_common.sh     | 105 +++++++++---------
 egs/wsj/s5/steps/data/reverberate_data_dir.py |  61 ++++++----
 5 files changed, 124 insertions(+), 167 deletions(-)
 delete mode 100755 egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh

diff --git a/egs/swbd/s5c/RESULTS b/egs/swbd/s5c/RESULTS
index 471e088ffba..e5bc3737c66 100644
--- a/egs/swbd/s5c/RESULTS
+++ b/egs/swbd/s5c/RESULTS
@@ -186,10 +186,10 @@ exit 0
 %WER 11.6 | 1831 21395 | 89.7 7.0 3.3 1.4 11.6 47.0 | exp/chain/tdnn_7d_sp/decode_eval2000_sw1_tg/score_10_1.0/eval2000_hires.ctm.swbd.filt.sys
 
 
-# results with chain TDNNs (2 epoch training on data reverberated with room impulse responses) (see local/chain/multi_condition/run_tdnn_7b.sh)
-%WER 10.0 | 1831 21395 | 91.0 6.0 3.0 1.1 10.0 43.8 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.swbd.filt.sys
-%WER 20.0 | 2628 21594 | 82.1 11.7 6.2 2.1 20.0 55.6 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
+# results with chain TDNNs (2 epoch training on data being speed-perturbed, volume-perturbed and reverberated with room impulse responses) (see local/chain/multi_condition/run_tdnn_7b.sh)
 %WER 15.0 | 4459 42989 | 86.5 8.8 4.7 1.6 15.0 50.7 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.filt.sys
+%WER 10.0 | 1831 21395 | 91.0 6.0 3.0 1.1 10.0 43.8 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.swbd.filt.sys
+%WER 20.0 | 2628 21594 | 82.1 11.7 6.2 2.1 20.0 55.6 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/sddcore_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 
 
 # current best 'chain' models with LSTM (see local/chain/run_lstm_d.sh)
diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
index 7a8c08970e2..19dd29eae16 100755
--- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
@@ -4,14 +4,15 @@ set -e
 
 # configs for 'chain'
 affix=
-stage=1
-train_stage=-10
+stage=12
+train_stage=$1
 get_egs_stage=-10
 speed_perturb=true
 dir=exp/chain/tdnn_7b  # Note: _sp will get added to this if $speed_perturb == true.
 decode_iter=
 iv_dir=exp/nnet3_rvb
 num_data_reps=1
+clean_train_set=train_nodup
 
 # TDNN options
 # this script uses the new tdnn config generator so it needs a final 0 to reflect that the final layer input has no splicing
@@ -57,30 +58,40 @@ fi
 # nnet3 setup, and you can skip them by setting "--stage 8" if you have already
 # run those things.
 
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+# Also the data reverberation will be done in this script/
+echo local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
+  --clean-data-dir ${clean_train_set} \
+  --iv-dir $iv_dir \
+  --speed-perturb $speed_perturb \
+  --num-data-reps $num_data_reps || exit 1;
+
+
 if [ "$speed_perturb" == "true" ]; then
   suffix=_sp
 fi
 
-dir=${dir}${affix:+_$affix}${suffix}_rvb${num_data_reps}_mix
-train_set=train_nodup${suffix}_rvb${num_data_reps}_mix
+clean_train_set=${clean_train_set}${suffix}
+dir=${dir}${affix:+_$affix}${suffix}_rvb${num_data_reps}
+train_set=${clean_train_set}${suffix}_rvb${num_data_reps}
 lang=data/lang_chain_2y
 treedir=exp/chain/tri5_2y_tree${suffix}
 lat_dir=exp/tri4_lats_nodup${suffix}
-rvb_lat_dir=${lat_dir}_rvb${num_data_reps}_mix
-
-
-# if we are using the speed-perturbed data we need to generate
-# alignments for it.
-echo local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
-  --clean-data-dir train_nodup${suffix} \
-  --iv-dir $iv_dir \
-  --num-data-reps $num_data_reps || exit 1;
+rvb_lat_dir=${lat_dir}_rvb${num_data_reps}
 
 
 if [ $stage -le 9 ]; then
   # Get the alignments as lattices (gives the CTC training more freedom).
   # use the same num-jobs as the alignments
-  
+  nj=$(cat exp/tri4_ali_nodup${suffix}/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_data_dir} \
+    data/lang exp/tri4 $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+
+
+  # Create the lattices for the reverberated data
   mkdir -p $rvb_lat_dir/temp/
   lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp
 
@@ -157,7 +168,6 @@ if [ $stage -le 12 ]; then
 fi
 
 
-
 if [ $stage -le 13 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
@@ -191,7 +201,6 @@ if [ $stage -le 13 ]; then
     --tree-dir $treedir \
     --lat-dir $rvb_lat_dir \
     --dir $dir  || exit 1;
-
 fi
 
 if [ $stage -le 13 ]; then
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh b/egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh
deleted file mode 100755
index 42ea2dc4b9d..00000000000
--- a/egs/swbd/s5c/local/nnet3/multi_condition/copy_ali_dir.sh
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/bash
-
-# Copyright 2014  Johns Hopkins University (author: Vijayaditya Peddinti)
-# Apache 2.0
-
-# This script operates on a directory, such as in exp/tri4a_ali,
-# that contains some subset of the following files:
-#  ali.*.gz
-#  tree
-#  cmvn_opts
-#  splice_opts
-#  num_jobs
-#  final.mdl
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance names.
-
-
-# begin configuration section
-utt_prefix=
-utt_suffix=
-cmd=run.pl
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <src_dir> <dest_dir>"
-  echo "e.g.:"
-  echo " $0  --utt-prefix=1- exp/tri4a_ali exp/tri4a_rev1_ali"
-  echo "Options"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-src_dir=$1
-dest_dir=$2
-
-mkdir -p $dest_dir
-
-if [ ! -f $src_dir/ali.1.gz ]; then
-  echo "copy_ali_dir.sh: no such files $src_dir/ali.*.gz"
-  exit 1;
-fi
-
-for f in tree cmvn_opts splice_opts num_jobs final.mdl; do
-  if [ ! -f $src_dir/$f ]; then
-    echo "copy_ali_dir.sh: no such file $src_dir/$f this might be serious error."
-    continue
-  fi
-  cp $src_dir/$f $dest_dir/
-done
-
-nj=$(cat $dest_dir/num_jobs)
-mkdir -p $dest_dir/temp
-cat << EOF > $dest_dir/temp/copy_ali.sh
-set -e;
-id=\$1
-echo "$src_dir/ali.\$id.gz"
-gunzip -c $src_dir/ali.\$id.gz | \
-  copy-int-vector ark:- ark,t:- | \
-python -c "
-import sys
-for line in sys.stdin:
-  parts = line.split()
-  print '$utt_prefix{0}$utt_suffix {1}'.format(parts[0], ' '.join(parts[1:]))
-" | \
-  gzip -c >$dest_dir/ali.\$id.gz || exit 1;
-set +o pipefail; # unset the pipefail option.
-EOF
-chmod +x $dest_dir/temp/copy_ali.sh
-$cmd -v PATH JOB=1:$nj $dest_dir/temp/copy_ali.JOB.log $dest_dir/temp/copy_ali.sh JOB || exit 1;
-
-echo "$0: copied alignments from $src_dir to $dest_dir"
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
index 126bf17b557..e99a7168c87 100755
--- a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
@@ -9,8 +9,9 @@ stage=1
 foreground_snrs="20:10:15:5:0"
 background_snrs="20:10:15:5:0"
 num_data_reps=1
-clean_data_dir=train_nodup_sp
+clean_data_dir=train_nodup
 iv_dir=exp/nnet3_rvb
+speed_perturb=true
 
 set -e
 . cmd.sh
@@ -18,9 +19,45 @@ set -e
 . ./utils/parse_options.sh
 
 mkdir -p $iv_dir
-train_set=${clean_data_dir}_rvb${num_data_reps}
 
-if [ $stage -le 1 ]; then
+if [ "$speed_perturb" == "true" ]; then
+  # perturbed data preparation
+  if [ $stage -le 1 ] && [ ! -d data/${clean_data_dir}_sp ]; then
+    #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
+    # _sp stands for speed-perturbed
+
+    for datadir in ${clean_data_dir}; do
+      utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
+      utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
+      utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
+      utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
+      rm -r data/temp1 data/temp2
+
+      mfccdir=mfcc_perturbed
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
+        data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_tmp
+
+      utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
+      utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
+      utils/fix_data_dir.sh data/${datadir}_sp
+      rm -r data/temp0 data/${datadir}_tmp
+    done
+  fi
+
+
+  if [ $stage -le 2 ]; then
+    #obtain the alignment of the perturbed data
+    steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
+      data/${clean_data_dir}_sp data/lang_nosp exp/tri4 exp/tri4_ali_nodup_sp || exit 1
+  fi
+
+  clean_data_dir=${clean_data_dir}_sp
+fi
+
+
+if [ $stage -le 3 ]; then
   if [ ! -d "RIRS_NOISES" ]; then
     # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
     wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
@@ -28,6 +65,7 @@ if [ $stage -le 1 ]; then
   fi
 
   # corrupt the data to generate reverberated data 
+  # this script modifies wav.scp to include the reverberation commands, the real computation will be done at the feature extraction
   python steps/data/reverberate_data_dir.py \
     --prefix "rev" \
     --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/smallroom/rir_list" \
@@ -42,18 +80,19 @@ if [ $stage -le 1 ]; then
     --num-replications $num_data_reps \
     --max-noises-per-minute 1 \
     --source-sampling-rate 8000 \
-    data/${clean_data_dir} data/${train_set}
+    --include-original-data true \
+    data/${clean_data_dir} data/${clean_data_dir}_rvb${num_data_reps}
 fi
 
 
-if [ $stage -le 2 ]; then
+if [ $stage -le 4 ]; then
   mfccdir=mfcc_rvb
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
     date=$(date +'%m_%d_%H_%M')
     utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage
   fi
 
-  for dataset in $train_set; do
+  for dataset in ${clean_data_dir}_rvb${num_data_reps}; do
     utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
 
     # do volume-perturbation on the training data prior to extracting hires
@@ -70,58 +109,28 @@ fi
 
 # ivector extractor training
 if [ $stage -le 5 ]; then
-  # Here we want to build a 200k system, half from the reverberated set and half from the original set
-  local/nnet3/multi_condition/copy_ali_dir.sh --utt-prefix "rev1_sp1.0-" exp/tri2_ali_100k_nodup exp/tri2_ali_100k_nodup_rvb || exit 1;
-  local/nnet3/multi_condition/copy_ali_dir.sh --utt-prefix "rev0_sp1.0-" exp/tri2_ali_100k_nodup exp/tri2_ali_100k_nodup_clean || exit 1;
-
-  # want the 100k subset to exactly match train_100k, since we'll use its alignments.
-  awk -v p='rev1_sp1.0-' '{printf "%s%s\n", p, $1}' data/train_100k_nodup/utt2spk > uttlist
-  utils/subset_data_dir.sh --utt-list uttlist \
-    data/${train_set}_hires data/${train_set}_100k_hires
-  rm uttlist
-
-  # Mix the 100k original data and the 100k reverberated data
-  utils/copy_data_dir.sh --spk-prefix "rev0_sp1.0-" --utt-prefix "rev0_sp1.0-" data/train_100k_nodup_hires data/train_100k_nodup_hires_tmp
-  utils/combine_data.sh data/${train_set}_200k_mix_hires data/train_100k_nodup_hires_tmp data/${train_set}_100k_hires
-  rm -r data/train_100k_nodup_hires_tmp
-
-  # combine the alignment for mixed data
-  steps/combine_ali_dirs.sh --num-jobs 30 data/${train_set}_200k_mix_hires exp/tri2_ali_200k_mix exp/tri2_ali_100k_nodup_clean exp/tri2_ali_100k_nodup_rvb || exit 1;
-  rm -r exp/tri2_ali_100k_nodup_clean exp/tri2_ali_100k_nodup_rvb
-
-  # We need to build a small system just because we need the LDA+MLLT transform
-  # to train the diag-UBM on top of.  We use --num-iters 13 because after we get
-  # the transform (12th iter is the last), any further training is pointless.
-  # this decision is based on fisher_english
   steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
     --splice-opts "--left-context=3 --right-context=3" \
-    5500 90000 data/${train_set}_200k_mix_hires \
-    data/lang_nosp exp/tri2_ali_200k_mix $iv_dir/tri3b
+    5500 90000 data/train_100k_nodup_hires \
+    data/lang_nosp exp/tri2_ali_100k_nodup $iv_dir/tri3b
 fi
 
-if [ $stage -le 6 ]; then
-  utils/copy_data_dir.sh --spk-prefix "rev0_" --utt-prefix "rev0_" data/${clean_data_dir}_30k_nodup_hires data/${clean_data_dir}_30k_nodup_hires_tmp
-  # want the reverberated 30k subset to exactly match clean 30k, since we'll use its alignments.
-  awk -v p='rev1_' '{printf "%s%s\n", p, $1}' data/${clean_data_dir}_30k_nodup_hires/utt2spk > uttlist
-  utils/subset_data_dir.sh --utt-list uttlist \
-    data/${train_set}_hires data/${train_set}_30k_hires
-  rm uttlist
-
-  # Mix the 30k original data and the 30k reverberated data
-  utils/combine_data.sh data/${train_set}_60k_mix_hires data/${clean_data_dir}_30k_nodup_hires_tmp data/${train_set}_30k_hires
-  rm -r data/${clean_data_dir}_30k_nodup_hires_tmp
+train_set=${clean_data_dir}_rvb${num_data_reps}
 
+if [ $stage -le 6 ]; then
   # To train a diagonal UBM we don't need very much data, so use the smallest subset.
+  utils/subset_data_dir.sh data/${train_set}_hires 30000 data/${train_set}_30k_hires
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
-    data/${train_set}_60k_mix_hires 512 $iv_dir/tri3b $iv_dir/diag_ubm
+    data/${train_set}_30k_hires 512 $iv_dir/tri3b $iv_dir/diag_ubm
 fi
 
 if [ $stage -le 7 ]; then
   # iVector extractors can be sensitive to the amount of data, but this one has a
   # fairly small dim (defaults to 100) so we don't use all of it, we use just the
   # 100k subset (just under half the data).
+  utils/subset_data_dir.sh data/${train_set}_hires 100000 data/${train_set}_100k_hires
   steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
-    data/${train_set}_200k_mix_hires $iv_dir/diag_ubm $iv_dir/extractor || exit 1;
+    data/${train_set}_100k_hires $iv_dir/diag_ubm $iv_dir/extractor || exit 1;
 fi
 
 if [ $stage -le 8 ]; then
@@ -129,14 +138,10 @@ if [ $stage -le 8 ]; then
   # train the system on.
   # handle per-utterance decoding well (iVector starts at zero).
 
-  # Mix all the original data and all the reverberated data
-  utils/copy_data_dir.sh --spk-prefix "rev0_" --utt-prefix "rev0_" data/${clean_data_dir}_hires data/${clean_data_dir}_hires_clean
-  utils/combine_data.sh data/${train_set}_mix_hires data/${clean_data_dir}_hires_clean data/${train_set}_hires
-
-  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_mix_hires data/${train_set}_mix_max2_hires
+  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires
 
   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
-    data/${train_set}_mix_max2_hires $iv_dir/extractor $iv_dir/ivectors_${train_set}_mix || exit 1;
+    data/${train_set}_max2_hires $iv_dir/extractor $iv_dir/ivectors_${train_set} || exit 1;
   
   for data_set in eval2000; do
     steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 890213475cd..72679406213 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -69,6 +69,8 @@ def GetArgs():
     parser.add_argument('--source-sampling-rate', type=int, default=None,
                         help="Sampling rate of the source data. If a positive integer is specified with this option, "
                         "the RIRs/noises will be resampled to the rate of the source data.")
+    parser.add_argument("--include-original-data", type=str, help="If true, the output data includes one copy of the original data",
+                         choices=['true', 'false'], default = False)
     parser.add_argument("input_dir",
                         help="Input data directory")
     parser.add_argument("output_dir",
@@ -85,11 +87,11 @@ def CheckArgs(args):
     if not os.path.exists(args.output_dir):
         os.makedirs(args.output_dir)
 
-    ## Check arguments.
-    
-    if args.num_replicas > 1 and args.prefix is None:
-        args.prefix = "rvb"
-        warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.")
+    ## Check arguments
+    if args.prefix is None:
+        if args.num_replicas > 1 or args.include_original_data:
+            args.prefix = "rvb"
+            warnings.warn("--prefix is set to 'rvb' as more than one copy of data is generated")
 
     if not args.num_replicas > 0:
         raise Exception("--num-replications cannot be non-positive")
@@ -180,13 +182,18 @@ def WriteDictToFile(dict, file_name):
 
 
 # This function creates the utt2uniq file from the utterance id in utt2spk file
-def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix):
+def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix):
     corrupted_utt2uniq = {}
     # Parse the utt2spk to get the utterance id
     utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x))
     keys = utt2spk.keys()
     keys.sort()
-    for i in range(1, num_replicas+1):
+    if include_original:
+        start_index = 0
+    else:
+        start_index = 1
+
+    for i in range(start_index, num_replicas+1):
         for utt_id in keys:
             new_utt_id = GetNewId(utt_id, prefix, i)
             corrupted_utt2uniq[new_utt_id] = utt_id
@@ -314,6 +321,7 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
                                foreground_snr_array, # the SNR for adding the foreground noises
                                background_snr_array, # the SNR for adding the background noises
                                num_replicas, # Number of replicate to generated for the data
+                               include_original, # include a copy of the original data
                                prefix, # prefix for the id of the corrupted utterances
                                speech_rvb_probability, # Probability of reverberating a speech signal
                                shift_output, # option whether to shift the output waveform
@@ -326,7 +334,12 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
     corrupted_wav_scp = {}
     keys = wav_scp.keys()
     keys.sort()
-    for i in range(1, num_replicas+1):
+    if include_original:
+        start_index = 0
+    else:
+        start_index = 1
+
+    for i in range(start_index, num_replicas+1):
         for recording_id in keys:
             wav_original_pipe = wav_scp[recording_id]
             # check if it is really a pipe
@@ -346,8 +359,9 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
                                                          speech_dur,  # duration of the recording
                                                          max_noises_recording  # Maximum number of point-source noises that can be added
                                                          )       
-            
-            if reverberate_opts == "":
+
+            # prefix with index 0, e.g. rvb0_swb0035, stangs for the original data
+            if reverberate_opts == "" or i == 0:
                 wav_corrupted_pipe = "{0}".format(wav_original_pipe) 
             else:
                 wav_corrupted_pipe = "{0} wav-reverberate --shift-output={1} {2} - - |".format(wav_original_pipe, shift_output, reverberate_opts)
@@ -359,10 +373,15 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
 
 
 # This function replicate the entries in files like segments, utt2spk, text
-def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]):
+def AddPrefixToFields(input_file, output_file, num_replicas, include_original, prefix, field = [0]):
     list = map(lambda x: x.strip(), open(input_file))
     f = open(output_file, "w")
-    for i in range(1, num_replicas+1):
+    if include_original:
+        start_index = 0
+    else:
+        start_index = 1
+    
+    for i in range(start_index, num_replicas+1):
         for line in list:
             if len(line) > 0 and line[0] != ';':
                 split1 = line.split()
@@ -383,6 +402,7 @@ def CreateReverberatedCopy(input_dir,
                            foreground_snr_string, # the SNR for adding the foreground noises
                            background_snr_string, # the SNR for adding the background noises
                            num_replicas, # Number of replicate to generated for the data
+                           include_original, # include a copy of the original data
                            prefix, # prefix for the id of the corrupted utterances
                            speech_rvb_probability, # Probability of reverberating a speech signal
                            shift_output, # option whether to shift the output waveform
@@ -406,27 +426,26 @@ def CreateReverberatedCopy(input_dir,
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
 
     GenerateReverberatedWavScp(wav_scp, durations, output_dir, room_dict, pointsource_noise_list, iso_noise_dict,
-               foreground_snr_array, background_snr_array, num_replicas, prefix, 
+               foreground_snr_array, background_snr_array, num_replicas, include_original, prefix, 
                speech_rvb_probability, shift_output, isotropic_noise_addition_probability, 
                pointsource_noise_addition_probability, max_noises_per_minute)
 
-    AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, prefix, field = [0,1])
+    AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, include_original, prefix, field = [0,1])
     data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
                     .format(output_dir = output_dir))
 
     if os.path.isfile(input_dir + "/utt2uniq"):
-        AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, prefix, field =[0])
+        AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, include_original, prefix, field =[0])
     else:
         # Create the utt2uniq file
-        CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix)
-
+        CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix)
 
     if os.path.isfile(input_dir + "/text"):
-        AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, field =[0])
+        AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, include_original, prefix, field =[0])
     if os.path.isfile(input_dir + "/segments"):
-        AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, prefix, field = [0,1])
+        AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, include_original, prefix, field = [0,1])
     if os.path.isfile(input_dir + "/reco2file_and_channel"):
-        AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, prefix, field = [0,1])
+        AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, include_original, prefix, field = [0,1])
 
     data_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
                     .format(output_dir = output_dir))
@@ -597,6 +616,7 @@ def ParseNoiseList(noise_set_para_array, smoothing_weight, sampling_rate = None)
         pointsource_noise_list += SmoothProbabilityDistribution(current_pointsource_noise_list, smoothing_weight, noise_set.probability)
 
     # ensure the point-source noise probabilities sum to 1 
+    pointsource_noise_list = SmoothProbabilityDistribution(pointsource_noise_list, smoothing_weight, 1.0)
     if len(pointsource_noise_list) > 0:
         assert almost_equal(sum(noise.probability for noise in pointsource_noise_list), 1.0)
     
@@ -629,6 +649,7 @@ def Main():
                            foreground_snr_string = args.foreground_snr_string,
                            background_snr_string = args.background_snr_string,
                            num_replicas = args.num_replicas,
+                           include_original = args.include_original_data,
                            prefix = args.prefix,
                            speech_rvb_probability = args.speech_rvb_probability,
                            shift_output = args.shift_output,

From dc13729030961636d2016bb639cbf1cad6dfcb3b Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Tue, 25 Oct 2016 22:19:35 -0400
Subject: [PATCH 4/9] Add coments and fix typo

---
 .../chain/multi_condition/run_tdnn_7b.sh      | 29 +++++++++----------
 .../multi_condition/run_ivector_common.sh     | 28 +++++++-----------
 2 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
index 19dd29eae16..5a2ccdc971c 100755
--- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
@@ -4,14 +4,14 @@ set -e
 
 # configs for 'chain'
 affix=
-stage=12
-train_stage=$1
+stage=1
+train_stage=-10
 get_egs_stage=-10
 speed_perturb=true
 dir=exp/chain/tdnn_7b  # Note: _sp will get added to this if $speed_perturb == true.
 decode_iter=
 iv_dir=exp/nnet3_rvb
-num_data_reps=1
+num_data_reps=1        # number of reverberated copies of data to generate
 clean_train_set=train_nodup
 
 # TDNN options
@@ -59,21 +59,10 @@ fi
 # run those things.
 
 
-# if we are using the speed-perturbed data we need to generate
-# alignments for it.
-# Also the data reverberation will be done in this script/
-echo local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
-  --clean-data-dir ${clean_train_set} \
-  --iv-dir $iv_dir \
-  --speed-perturb $speed_perturb \
-  --num-data-reps $num_data_reps || exit 1;
-
-
 if [ "$speed_perturb" == "true" ]; then
   suffix=_sp
 fi
 
-clean_train_set=${clean_train_set}${suffix}
 dir=${dir}${affix:+_$affix}${suffix}_rvb${num_data_reps}
 train_set=${clean_train_set}${suffix}_rvb${num_data_reps}
 lang=data/lang_chain_2y
@@ -82,11 +71,21 @@ lat_dir=exp/tri4_lats_nodup${suffix}
 rvb_lat_dir=${lat_dir}_rvb${num_data_reps}
 
 
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+# Also the data reverberation will be done in this script/
+echo local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
+  --clean-data-dir ${clean_train_set} \
+  --iv-dir $iv_dir \
+  --speed-perturb $speed_perturb \
+  --num-data-reps $num_data_reps || exit 1;
+
+
 if [ $stage -le 9 ]; then
   # Get the alignments as lattices (gives the CTC training more freedom).
   # use the same num-jobs as the alignments
   nj=$(cat exp/tri4_ali_nodup${suffix}/num_jobs) || exit 1;
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_data_dir} \
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_train_set}${suffix} \
     data/lang exp/tri4 $lat_dir
   rm $lat_dir/fsts.*.gz # save space
 
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
index e99a7168c87..58c4b4d0b64 100755
--- a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
@@ -1,14 +1,12 @@
 #!/bin/bash 
 #set -e
-# this script is based on local/nnet3/run_ivector_common.sh
-# but it operates on corrupted training/dev/test data sets
+# This script is based on local/nnet3/run_ivector_common.sh.
+# It reverberates the original data with simulated room impulse responses
 
 . cmd.sh
 
 stage=1
-foreground_snrs="20:10:15:5:0"
-background_snrs="20:10:15:5:0"
-num_data_reps=1
+num_data_reps=1  # number of reverberated copies of data to generate
 clean_data_dir=train_nodup
 iv_dir=exp/nnet3_rvb
 speed_perturb=true
@@ -58,27 +56,21 @@ fi
 
 
 if [ $stage -le 3 ]; then
-  if [ ! -d "RIRS_NOISES" ]; then
-    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-    unzip rirs_noises.zip
+  if [ ! -d "simulated_rirs_8k" ]; then
+    # Download the simulated RIR package with 8k sampling rate
+    wget --no-check-certificate http://www.openslr.org/resources/26/sim_rir_8k.zip
+    unzip sim_rir_8k.zip
   fi
 
   # corrupt the data to generate reverberated data 
   # this script modifies wav.scp to include the reverberation commands, the real computation will be done at the feature extraction
   python steps/data/reverberate_data_dir.py \
     --prefix "rev" \
-    --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/smallroom/rir_list" \
-    --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/mediumroom/rir_list" \
-    --rir-set-parameters "0.25, RIRS_NOISES/simulated_rirs/largeroom/rir_list" \
-    --rir-set-parameters "0.25, RIRS_NOISES/real_rirs_isotropic_noises/rir_list" \
-    --foreground-snrs $foreground_snrs \
-    --background-snrs $background_snrs \
+    --rir-set-parameters "0.3, simulated_rirs_8k/smallroom/rir_list" \
+    --rir-set-parameters "0.3, simulated_rirs_8k/mediumroom/rir_list" \
+    --rir-set-parameters "0.3, simulated_rirs_8k/largeroom/rir_list" \
     --speech-rvb-probability 1 \
-    --pointsource-noise-addition-probability 1 \
-    --isotropic-noise-addition-probability 1 \
     --num-replications $num_data_reps \
-    --max-noises-per-minute 1 \
     --source-sampling-rate 8000 \
     --include-original-data true \
     data/${clean_data_dir} data/${clean_data_dir}_rvb${num_data_reps}

From 82cd6f70e68d20028c64e6d68376ecf8a50851e9 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Thu, 27 Oct 2016 01:07:49 -0400
Subject: [PATCH 5/9] fix --include-original-data option in
 reverberate_data_dir.py

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 72679406213..017aedb05a3 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -70,7 +70,7 @@ def GetArgs():
                         help="Sampling rate of the source data. If a positive integer is specified with this option, "
                         "the RIRs/noises will be resampled to the rate of the source data.")
     parser.add_argument("--include-original-data", type=str, help="If true, the output data includes one copy of the original data",
-                         choices=['true', 'false'], default = False)
+                         choices=['true', 'false'], default = "false")
     parser.add_argument("input_dir",
                         help="Input data directory")
     parser.add_argument("output_dir",
@@ -89,7 +89,7 @@ def CheckArgs(args):
 
     ## Check arguments
     if args.prefix is None:
-        if args.num_replicas > 1 or args.include_original_data:
+        if args.num_replicas > 1 or args.include_original_data == "true":
             args.prefix = "rvb"
             warnings.warn("--prefix is set to 'rvb' as more than one copy of data is generated")
 
@@ -641,6 +641,11 @@ def Main():
         print("Number of isotropic noises is {0}".format(sum(len(iso_noise_dict[key]) for key in iso_noise_dict.keys())))
     room_dict = MakeRoomDict(rir_list)
 
+    if args.include_original_data == "true":
+        include_original = True
+    else:
+        include_original = False
+
     CreateReverberatedCopy(input_dir = args.input_dir,
                            output_dir = args.output_dir,
                            room_dict = room_dict,
@@ -649,7 +654,7 @@ def Main():
                            foreground_snr_string = args.foreground_snr_string,
                            background_snr_string = args.background_snr_string,
                            num_replicas = args.num_replicas,
-                           include_original = args.include_original_data,
+                           include_original = include_original,
                            prefix = args.prefix,
                            speech_rvb_probability = args.speech_rvb_probability,
                            shift_output = args.shift_output,

From a4ee796b480abe2c924a6dd7d833375cf995f38d Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Mon, 31 Oct 2016 23:43:42 -0400
Subject: [PATCH 6/9] adding run_tdnn_7g.sh which is the current best chain
 result

---
 egs/swbd/s5c/RESULTS                          |  31 +--
 egs/swbd/s5c/local/chain/run_tdnn.sh          |   2 +-
 .../s5c/local/chain/tuning/run_tdnn_7f.sh     | 210 ++++++++++++++++++
 .../run_tdnn_7b.sh => tuning/run_tdnn_7g.sh}  | 123 +++++-----
 .../multi_condition/run_ivector_common.sh     |  29 +--
 5 files changed, 312 insertions(+), 83 deletions(-)
 create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
 rename egs/swbd/s5c/local/chain/{multi_condition/run_tdnn_7b.sh => tuning/run_tdnn_7g.sh} (65%)

diff --git a/egs/swbd/s5c/RESULTS b/egs/swbd/s5c/RESULTS
index e5bc3737c66..f103200f966 100644
--- a/egs/swbd/s5c/RESULTS
+++ b/egs/swbd/s5c/RESULTS
@@ -152,13 +152,23 @@ exit 0
 %WER 19.4 | 2628 21594 | 82.7 12.0 5.3 2.1 19.4 54.9 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 %WER 20.8 | 2628 21594 | 81.3 13.1 5.6 2.2 20.8 56.9 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 
-# bidirectional LSTM with the same configuration as the above experiment, plus self-repair of all nonliearities and clipgradient activated
-%WER 10.4 | 1831 21395 | 90.5 6.2 3.3 0.9 10.4 44.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
-%WER 11.3 | 1831 21395 | 89.8 6.8 3.3 1.1 11.3 46.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
+# bidirectional LSTM with the same configuration as the above experiment, with self-repair of all nonliearities and clipgradient, and max-change-per-component activated
+%WER 14.9 | 4459 42989 | 86.7 9.0 4.3 1.6 14.9 50.5 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 15.9 | 4459 42989 | 85.7 9.8 4.5 1.7 15.9 52.3 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 10.2 | 1831 21395 | 90.8 6.1 3.2 1.0 10.2 44.4 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 11.2 | 1831 21395 | 89.9 6.8 3.3 1.1 11.2 46.6 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 19.4 | 2628 21594 | 82.7 11.8 5.4 2.2 19.4 54.5 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
+%WER 20.6 | 2628 21594 | 81.5 12.8 5.7 2.2 20.6 56.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.5/eval2000_hires.ctm.callhm.filt.sys
+
+(
+# bidirectional LSTM with the same configuration as the above experiment, with self-repair of all nonliearities and clipgradient activated
 %WER 15.0 | 4459 42989 | 86.5 9.1 4.5 1.5 15.0 50.4 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.filt.sys
 %WER 16.0 | 4459 42989 | 85.6 9.9 4.5 1.6 16.0 52.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 10.4 | 1831 21395 | 90.5 6.2 3.3 0.9 10.4 44.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 11.3 | 1831 21395 | 89.8 6.8 3.3 1.1 11.3 46.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
 %WER 19.6 | 2628 21594 | 82.5 12.1 5.5 2.1 19.6 54.8 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 %WER 20.7 | 2628 21594 | 81.4 12.9 5.7 2.2 20.7 56.8 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
+)
 
 # results with nnet3 tdnn: local/nnet3/run_tdnn.sh (11.10.2015) (2 epoch training on speed-perturbed and volume perturbed data)
 %WER 12.1 | 1831 21395 | 89.1 7.1 3.8 1.3 12.1 48.1 | exp/nnet3/tdnn_sp/decode_eval2000_hires_sw1_fsh_fg/score_12_0.0/eval2000_hires.ctm.swbd.filt.sys
@@ -181,16 +191,11 @@ exit 0
 %WER 24.3 | 2628 21594 | 78.6 15.0 6.4 2.9 24.3 60.0 | exp/nnet3/tdnn_cnn_sp/decode_eval2000_hires_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 
 
-# current best 'chain' models with TDNNs (see local/chain/run_tdnn_7d.sh)
-%WER 10.4 | 1831 21395 | 90.7 6.1 3.2 1.2 10.4 44.6 | exp/chain/tdnn_7d_sp/decode_eval2000_sw1_fsh_fg/score_11_1.0/eval2000_hires.ctm.swbd.filt.sys
-%WER 11.6 | 1831 21395 | 89.7 7.0 3.3 1.4 11.6 47.0 | exp/chain/tdnn_7d_sp/decode_eval2000_sw1_tg/score_10_1.0/eval2000_hires.ctm.swbd.filt.sys
-
-
-# results with chain TDNNs (2 epoch training on data being speed-perturbed, volume-perturbed and reverberated with room impulse responses) (see local/chain/multi_condition/run_tdnn_7b.sh)
-%WER 15.0 | 4459 42989 | 86.5 8.8 4.7 1.6 15.0 50.7 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.filt.sys
-%WER 10.0 | 1831 21395 | 91.0 6.0 3.0 1.1 10.0 43.8 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/score_10_0.5/eval2000_hires.ctm.swbd.filt.sys
-%WER 20.0 | 2628 21594 | 82.1 11.7 6.2 2.1 20.0 55.6 | exp/chain/tdnn_7b_sp_rvb1_mix/decode_eval2000_sw1_fsh_fg/sddcore_10_0.0/eval2000_hires.ctm.callhm.filt.sys
-
+# current best 'chain' models with TDNNs (see local/chain/run_tdnn_7g.sh)
+# (2 epoch training on data being speed-perturbed, volume-perturbed and reverberated with room impulse responses)
+%WER 14.6 | 4459 42989 | 87.1 8.7 4.2 1.7 14.6 50.7 | exp/chain/tdnn_7g_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
+%WER 9.8 | 1831 21395 | 91.2 5.7 3.1 1.1 9.8 43.4 | exp/chain/tdnn_7g_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 19.3 | 2628 21594 | 83.0 11.5 5.5 2.3 19.3 55.8 | exp/chain/tdnn_7g_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
 
 # current best 'chain' models with LSTM (see local/chain/run_lstm_d.sh)
 %WER 15.9 | 4459 42989 | 86.0 9.6 4.3 2.0 15.9 51.7 | exp/chain/lstm_d_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
diff --git a/egs/swbd/s5c/local/chain/run_tdnn.sh b/egs/swbd/s5c/local/chain/run_tdnn.sh
index fd753c6faa5..4b80e886c66 120000
--- a/egs/swbd/s5c/local/chain/run_tdnn.sh
+++ b/egs/swbd/s5c/local/chain/run_tdnn.sh
@@ -1 +1 @@
-tuning/run_tdnn_7e.sh
\ No newline at end of file
+tuning/run_tdnn_7g.sh
\ No newline at end of file
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
new file mode 100755
index 00000000000..5c47da1024f
--- /dev/null
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
@@ -0,0 +1,210 @@
+#!/bin/bash
+
+# 7e is as 7f, but adding the max-change-per-component to the neural net training
+# which affects results slightly
+# local/chain/compare_wer.sh 7e 7f
+# System                       7e         7f
+# WER on train_dev(tg)      14.41      14.46
+# WER on train_dev(fg)      13.39      13.23
+# WER on eval2000(tg)        16.9       17.0
+# WER on eval2000(fg)        15.3       15.4
+# Final train prob     -0.0853629 -0.0882071
+# Final valid prob      -0.110972  -0.107545
+# Final train prob (xent)      -1.25237 -1.26246
+# Final valid prob (xent)      -1.36715 -1.35525
+
+
+set -e
+
+# configs for 'chain'
+affix=
+stage=12
+train_stage=-10
+get_egs_stage=-10
+speed_perturb=true
+dir=exp/chain/tdnn_7f  # Note: _sp will get added to this if $speed_perturb == true.
+decode_iter=
+
+# TDNN options
+# this script uses the new tdnn config generator so it needs a final 0 to reflect that the final layer input has no splicing
+# smoothing options
+self_repair_scale=0.00001
+# training options
+num_epochs=4
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+leftmost_questions_truncate=-1
+max_param_change=2.0
+final_layer_normalize_target=0.5
+num_jobs_initial=3
+num_jobs_final=16
+minibatch_size=128
+relu_dim=625
+frames_per_eg=150
+remove_egs=false
+common_egs_dir=
+xent_regularize=0.1
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+suffix=
+if [ "$speed_perturb" == "true" ]; then
+  suffix=_sp
+fi
+
+dir=${dir}${affix:+_$affix}$suffix
+train_set=train_nodup$suffix
+ali_dir=exp/tri4_ali_nodup$suffix
+treedir=exp/chain/tri5_7d_tree$suffix
+lang=data/lang_chain_2y
+
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+local/nnet3/run_ivector_common.sh --stage $stage \
+  --speed-perturb $speed_perturb \
+  --generate-alignments $speed_perturb || exit 1;
+
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the CTC training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat exp/tri4_ali_nodup$suffix/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \
+    data/lang exp/tri4 exp/tri4_lats_nodup$suffix
+  rm exp/tri4_lats_nodup$suffix/fsts.*.gz # save space
+fi
+
+
+if [ $stage -le 10 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 11 ]; then
+  # Build a tree using our new topology. This is the critically different
+  # step compared with other recipes.
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --leftmost-questions-truncate $leftmost_questions_truncate \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs";
+  if [ ! -z "$relu_dim" ]; then
+    dim_opts="--relu-dim $relu_dim"
+  else
+    dim_opts="--pnorm-input-dim $pnorm_input_dim --pnorm-output-dim  $pnorm_output_dim"
+  fi
+
+  # create the config files for nnet initialization
+  repair_opts=${self_repair_scale:+" --self-repair-scale-nonlinearity $self_repair_scale "}
+
+  steps/nnet3/tdnn/make_configs.py \
+    $repair_opts \
+    --feat-dir data/${train_set}_hires \
+    --ivector-dir exp/nnet3/ivectors_${train_set} \
+    --tree-dir $treedir \
+    $dim_opts \
+    --splice-indexes "-1,0,1 -1,0,1 -1,0,1 -3,0,3 -3,0,3 -6,0,6 0" \
+    --use-presoftmax-prior-scale false \
+    --xent-regularize $xent_regularize \
+    --xent-separate-forward-affine true \
+    --include-log-softmax false \
+    --final-layer-normalize-target $final_layer_normalize_target \
+    $dir/configs || exit 1;
+fi
+
+
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_eg \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.max-param-change $max_param_change \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir data/${train_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir exp/tri4_lats_nodup$suffix \
+    --dir $dir  || exit 1;
+
+fi
+
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg
+fi
+
+decode_suff=sw1_tg
+graph_dir=$dir/graph_sw1_tg
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    iter_opts=" --iter $decode_iter "
+  fi
+  for decode_set in train_dev eval2000; do
+      (
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj 50 --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3/ivectors_${decode_set} \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_${decode_suff} || exit 1;
+      if $has_fisher; then
+          steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+            data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
+            $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1;
+      fi
+      ) &
+  done
+fi
+wait;
+exit 0;
diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
similarity index 65%
rename from egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
rename to egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
index 5a2ccdc971c..2650e06fe5d 100755
--- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
@@ -1,5 +1,22 @@
 #!/bin/bash
 
+# 7g is as 7f, but reverberating the training data with room impulse responses
+# which leads to better results
+# This script assumes a mixing of the original training data with its reverberated copy
+# and results in a 2-fold training set. Thus the number of epochs is halved to
+# keep the same training time.
+# local/chain/compare_wer.sh 7f 7g
+# System                       7f        7g
+# WER on train_dev(tg)      14.46     14.27
+# WER on train_dev(fg)      13.23     13.16
+# WER on eval2000(tg)        17.0      16.3
+# WER on eval2000(fg)        15.4      14.6
+# Final train prob     -0.0882071 -0.123325
+# Final valid prob      -0.107545 -0.131798
+# Final train prob (xent)      -1.26246   -1.6196
+# Final valid prob (xent)      -1.35525  -1.60244
+
+
 set -e
 
 # configs for 'chain'
@@ -8,18 +25,16 @@ stage=1
 train_stage=-10
 get_egs_stage=-10
 speed_perturb=true
-dir=exp/chain/tdnn_7b  # Note: _sp will get added to this if $speed_perturb == true.
+dir=exp/chain/tdnn_7g  # Note: _sp will get added to this if $speed_perturb == true.
 decode_iter=
-iv_dir=exp/nnet3_rvb
+ivector_dir=exp/nnet3_rvb
 num_data_reps=1        # number of reverberated copies of data to generate
-clean_train_set=train_nodup
+input_train_set=train_nodup
+
 
 # TDNN options
 # this script uses the new tdnn config generator so it needs a final 0 to reflect that the final layer input has no splicing
 # smoothing options
-pool_window=
-pool_type='none'
-pool_lpfilter_width=
 self_repair_scale=0.00001
 # training options
 num_epochs=2
@@ -33,16 +48,14 @@ num_jobs_final=16
 minibatch_size=128
 relu_dim=625
 frames_per_eg=150
-remove_egs=true
+remove_egs=false
 common_egs_dir=
 xent_regularize=0.1
 
-
-
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-. cmd.sh
+. ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
@@ -58,25 +71,27 @@ fi
 # nnet3 setup, and you can skip them by setting "--stage 8" if you have already
 # run those things.
 
-
+suffix=
 if [ "$speed_perturb" == "true" ]; then
   suffix=_sp
 fi
 
-dir=${dir}${affix:+_$affix}${suffix}_rvb${num_data_reps}
-train_set=${clean_train_set}${suffix}_rvb${num_data_reps}
+dir=${dir}${affix:+_$affix}$suffix
+clean_train_set=${input_train_set}${suffix}
+train_set=${clean_train_set}_rvb${num_data_reps}
+ali_dir=exp/tri4_ali_nodup$suffix
+treedir=exp/chain/tri5_7d_tree$suffix
 lang=data/lang_chain_2y
-treedir=exp/chain/tri5_2y_tree${suffix}
-lat_dir=exp/tri4_lats_nodup${suffix}
-rvb_lat_dir=${lat_dir}_rvb${num_data_reps}
+clean_lat_dir=exp/tri4_lats_nodup${suffix}
+lat_dir=${clean_lat_dir}_rvb${num_data_reps}
 
 
 # if we are using the speed-perturbed data we need to generate
 # alignments for it.
-# Also the data reverberation will be done in this script/
-echo local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
-  --clean-data-dir ${clean_train_set} \
-  --iv-dir $iv_dir \
+# The data reverberation will be done in this script.
+local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
+  --input-data-dir ${input_train_set} \
+  --ivector-dir $ivector_dir \
   --speed-perturb $speed_perturb \
   --num-data-reps $num_data_reps || exit 1;
 
@@ -85,30 +100,30 @@ if [ $stage -le 9 ]; then
   # Get the alignments as lattices (gives the CTC training more freedom).
   # use the same num-jobs as the alignments
   nj=$(cat exp/tri4_ali_nodup${suffix}/num_jobs) || exit 1;
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_train_set}${suffix} \
-    data/lang exp/tri4 $lat_dir
-  rm $lat_dir/fsts.*.gz # save space
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_train_set} \
+    data/lang exp/tri4 $clean_lat_dir
+  rm $clean_lat_dir/fsts.*.gz # save space
 
 
   # Create the lattices for the reverberated data
-  mkdir -p $rvb_lat_dir/temp/
-  lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp
+  mkdir -p $lat_dir/temp/
+  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
 
   # copy the lattices for the reverberated data
-  rm -f $rvb_lat_dir/temp/combined_lats.scp
-  touch $rvb_lat_dir/temp/combined_lats.scp
+  rm -f $lat_dir/temp/combined_lats.scp
+  touch $lat_dir/temp/combined_lats.scp
   # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
   for i in `seq 0 $num_data_reps`; do
-    cat $rvb_lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $rvb_lat_dir/temp/combined_lats.scp
+    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
   done
-  sort -u $rvb_lat_dir/temp/combined_lats.scp > $rvb_lat_dir/temp/combined_lats_sorted.scp
+  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
 
-  lattice-copy scp:$rvb_lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$rvb_lat_dir/lat.1.gz" || exit 1;
-  echo "1" > $rvb_lat_dir/num_jobs
+  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
+  echo "1" > $lat_dir/num_jobs
 
   # copy other files from original lattice dir
   for f in cmvn_opts final.mdl splice_opts tree; do
-    cp $lat_dir/$f $rvb_lat_dir/$f
+    cp $clean_lat_dir/$f $lat_dir/$f
   done
 fi
 
@@ -127,13 +142,12 @@ if [ $stage -le 10 ]; then
 fi
 
 if [ $stage -le 11 ]; then
-  # Build a tree using our new topology.
-  # we build the tree using clean features (data/train) rather than
-  # the augmented features (data/train_rvb) to get better alignments
-
+  # Build a tree using our new topology. This is the critically different
+  # step compared with other recipes.
   steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
       --leftmost-questions-truncate $leftmost_questions_truncate \
-      --cmd "$train_cmd" 9000 data/train_nodup${suffix} $lang exp/tri4_ali_nodup${suffix} $treedir
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" 7000 data/${clean_train_set} $lang $ali_dir $treedir
 fi
 
 if [ $stage -le 12 ]; then
@@ -145,19 +159,15 @@ if [ $stage -le 12 ]; then
   fi
 
   # create the config files for nnet initialization
-  pool_opts=
-  pool_opts=$pool_opts${pool_type:+" --pool-type $pool_type "}
-  pool_opts=$pool_opts${pool_window:+" --pool-window $pool_window "}
-  pool_opts=$pool_opts${pool_lpfilter_width:+" --pool-lpfilter-width $pool_lpfilter_width "}
-  repair_opts=${self_repair_scale:+" --self-repair-scale $self_repair_scale "}
+  repair_opts=${self_repair_scale:+" --self-repair-scale-nonlinearity $self_repair_scale "}
 
-  steps/nnet3/tdnn/make_configs.py $pool_opts \
+  steps/nnet3/tdnn/make_configs.py \
     $repair_opts \
     --feat-dir data/${train_set}_hires \
-    --ivector-dir $iv_dir/ivectors_${train_set} \
+    --ivector-dir $ivector_dir/ivectors_${train_set} \
     --tree-dir $treedir \
     $dim_opts \
-    --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
+    --splice-indexes "-1,0,1 -1,0,1 -1,0,1 -3,0,3 -3,0,3 -6,0,6 0" \
     --use-presoftmax-prior-scale false \
     --xent-regularize $xent_regularize \
     --xent-separate-forward-affine true \
@@ -167,23 +177,23 @@ if [ $stage -le 12 ]; then
 fi
 
 
+
 if [ $stage -le 13 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
      /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
   fi
 
- touch $dir/egs/.nodelete # keep egs around when that run dies.
-
- steps/nnet3/chain/train.py --stage $train_stage \
+  steps/nnet3/chain/train.py --stage $train_stage \
     --cmd "$decode_cmd" \
-    --feat.online-ivector-dir $iv_dir/ivectors_${train_set} \
+    --feat.online-ivector-dir $ivector_dir/ivectors_${train_set} \
     --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
     --chain.xent-regularize $xent_regularize \
     --chain.leaky-hmm-coefficient 0.1 \
     --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
     --egs.opts "--frames-overlap-per-eg 0" \
     --egs.chunk-width $frames_per_eg \
@@ -198,29 +208,30 @@ if [ $stage -le 13 ]; then
     --cleanup.remove-egs $remove_egs \
     --feat-dir data/${train_set}_hires \
     --tree-dir $treedir \
-    --lat-dir $rvb_lat_dir \
+    --lat-dir $lat_dir \
     --dir $dir  || exit 1;
+
 fi
 
-if [ $stage -le 13 ]; then
+if [ $stage -le 14 ]; then
   # Note: it might appear that this $lang directory is mismatched, and it is as
   # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
   # the lang directory.
-  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg
+  utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg
 fi
 
 decode_suff=sw1_tg
 graph_dir=$dir/graph_sw1_tg
-if [ $stage -le 14 ]; then
+if [ $stage -le 15 ]; then
   iter_opts=
   if [ ! -z $decode_iter ]; then
     iter_opts=" --iter $decode_iter "
   fi
-  for decode_set in eval2000; do
+  for decode_set in train_dev eval2000; do
       (
       steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj 30 --cmd "$decode_cmd" $iter_opts \
-          --online-ivector-dir $iv_dir/ivectors_${decode_set} \
+          --nj 50 --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $ivector_dir/ivectors_${decode_set} \
           $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_${decode_suff} || exit 1;
       if $has_fisher; then
           steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
index 58c4b4d0b64..6543b2b0366 100755
--- a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
@@ -7,8 +7,8 @@
 
 stage=1
 num_data_reps=1  # number of reverberated copies of data to generate
-clean_data_dir=train_nodup
-iv_dir=exp/nnet3_rvb
+input_data_dir=train_nodup
+ivector_dir=exp/nnet3_rvb
 speed_perturb=true
 
 set -e
@@ -16,15 +16,15 @@ set -e
 . ./path.sh
 . ./utils/parse_options.sh
 
-mkdir -p $iv_dir
+mkdir -p $ivector_dir
 
 if [ "$speed_perturb" == "true" ]; then
   # perturbed data preparation
-  if [ $stage -le 1 ] && [ ! -d data/${clean_data_dir}_sp ]; then
+  if [ $stage -le 1 ] && [ ! -d data/${input_data_dir}_sp ]; then
     #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
     # _sp stands for speed-perturbed
 
-    for datadir in ${clean_data_dir}; do
+    for datadir in ${input_data_dir}; do
       utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
       utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
       utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
@@ -48,10 +48,12 @@ if [ "$speed_perturb" == "true" ]; then
   if [ $stage -le 2 ]; then
     #obtain the alignment of the perturbed data
     steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
-      data/${clean_data_dir}_sp data/lang_nosp exp/tri4 exp/tri4_ali_nodup_sp || exit 1
+      data/${input_data_dir}_sp data/lang_nosp exp/tri4 exp/tri4_ali_nodup_sp || exit 1
   fi
 
-  clean_data_dir=${clean_data_dir}_sp
+  clean_data_dir=${input_data_dir}_sp
+else
+  clean_data_dir=${input_data_dir}
 fi
 
 
@@ -64,6 +66,7 @@ if [ $stage -le 3 ]; then
 
   # corrupt the data to generate reverberated data 
   # this script modifies wav.scp to include the reverberation commands, the real computation will be done at the feature extraction
+  # if --include-original-data is true, the original data will be mixed with its reverberated copies
   python steps/data/reverberate_data_dir.py \
     --prefix "rev" \
     --rir-set-parameters "0.3, simulated_rirs_8k/smallroom/rir_list" \
@@ -104,7 +107,7 @@ if [ $stage -le 5 ]; then
   steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
     --splice-opts "--left-context=3 --right-context=3" \
     5500 90000 data/train_100k_nodup_hires \
-    data/lang_nosp exp/tri2_ali_100k_nodup $iv_dir/tri3b
+    data/lang_nosp exp/tri2_ali_100k_nodup $ivector_dir/tri3b
 fi
 
 train_set=${clean_data_dir}_rvb${num_data_reps}
@@ -113,7 +116,7 @@ if [ $stage -le 6 ]; then
   # To train a diagonal UBM we don't need very much data, so use the smallest subset.
   utils/subset_data_dir.sh data/${train_set}_hires 30000 data/${train_set}_30k_hires
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
-    data/${train_set}_30k_hires 512 $iv_dir/tri3b $iv_dir/diag_ubm
+    data/${train_set}_30k_hires 512 $ivector_dir/tri3b $ivector_dir/diag_ubm
 fi
 
 if [ $stage -le 7 ]; then
@@ -122,7 +125,7 @@ if [ $stage -le 7 ]; then
   # 100k subset (just under half the data).
   utils/subset_data_dir.sh data/${train_set}_hires 100000 data/${train_set}_100k_hires
   steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
-    data/${train_set}_100k_hires $iv_dir/diag_ubm $iv_dir/extractor || exit 1;
+    data/${train_set}_100k_hires $ivector_dir/diag_ubm $ivector_dir/extractor || exit 1;
 fi
 
 if [ $stage -le 8 ]; then
@@ -133,11 +136,11 @@ if [ $stage -le 8 ]; then
   steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires
 
   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
-    data/${train_set}_max2_hires $iv_dir/extractor $iv_dir/ivectors_${train_set} || exit 1;
+    data/${train_set}_max2_hires $ivector_dir/extractor $ivector_dir/ivectors_${train_set} || exit 1;
   
-  for data_set in eval2000; do
+  for data_set in train_dev eval2000; do
     steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
-      data/${data_set}_hires $iv_dir/extractor $iv_dir/ivectors_$data_set || exit 1;
+      data/${data_set}_hires $ivector_dir/extractor $ivector_dir/ivectors_$data_set || exit 1;
   done
 fi
 

From 67673fa40f06364aea3f33e44b3f979e1193185c Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Fri, 4 Nov 2016 12:19:08 -0400
Subject: [PATCH 7/9] adding more comments the the script

---
 .../s5c/local/chain/tuning/run_tdnn_7f.sh     |  7 ++--
 .../s5c/local/chain/tuning/run_tdnn_7g.sh     | 14 ++++---
 .../multi_condition/run_ivector_common.sh     | 40 ++++++++-----------
 egs/wsj/s5/steps/data/reverberate_data_dir.py |  2 +-
 4 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
index 5c47da1024f..256373fc698 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# 7e is as 7f, but adding the max-change-per-component to the neural net training
+# 7f is as 7e, but adding the max-change-per-component to the neural net training
 # which affects results slightly
 # local/chain/compare_wer.sh 7e 7f
 # System                       7e         7f
@@ -27,6 +27,7 @@ decode_iter=
 
 # TDNN options
 # this script uses the new tdnn config generator so it needs a final 0 to reflect that the final layer input has no splicing
+splice_indexes="-1,0,1 -1,0,1 -1,0,1 -3,0,3 -3,0,3 -6,0,6 0"
 # smoothing options
 self_repair_scale=0.00001
 # training options
@@ -84,7 +85,7 @@ local/nnet3/run_ivector_common.sh --stage $stage \
 
 
 if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the CTC training more freedom).
+  # Get the alignments as lattices (gives the LF-MMI training more freedom).
   # use the same num-jobs as the alignments
   nj=$(cat exp/tri4_ali_nodup$suffix/num_jobs) || exit 1;
   steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \
@@ -132,7 +133,7 @@ if [ $stage -le 12 ]; then
     --ivector-dir exp/nnet3/ivectors_${train_set} \
     --tree-dir $treedir \
     $dim_opts \
-    --splice-indexes "-1,0,1 -1,0,1 -1,0,1 -3,0,3 -3,0,3 -6,0,6 0" \
+    --splice-indexes "$splice_indexes" \
     --use-presoftmax-prior-scale false \
     --xent-regularize $xent_regularize \
     --xent-separate-forward-affine true \
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
index 2650e06fe5d..cf1343e5041 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
@@ -4,7 +4,8 @@
 # which leads to better results
 # This script assumes a mixing of the original training data with its reverberated copy
 # and results in a 2-fold training set. Thus the number of epochs is halved to
-# keep the same training time.
+# keep the same training time. The model converges after 2 epochs of training,
+# The WER doesn't change much with more epochs of training.
 # local/chain/compare_wer.sh 7f 7g
 # System                       7f        7g
 # WER on train_dev(tg)      14.46     14.27
@@ -34,6 +35,7 @@ input_train_set=train_nodup
 
 # TDNN options
 # this script uses the new tdnn config generator so it needs a final 0 to reflect that the final layer input has no splicing
+splice_indexes="-1,0,1 -1,0,1 -1,0,1 -3,0,3 -3,0,3 -6,0,6 0"
 # smoothing options
 self_repair_scale=0.00001
 # training options
@@ -86,8 +88,6 @@ clean_lat_dir=exp/tri4_lats_nodup${suffix}
 lat_dir=${clean_lat_dir}_rvb${num_data_reps}
 
 
-# if we are using the speed-perturbed data we need to generate
-# alignments for it.
 # The data reverberation will be done in this script.
 local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
   --input-data-dir ${input_train_set} \
@@ -97,7 +97,7 @@ local/nnet3/multi_condition/run_ivector_common.sh --stage $stage \
 
 
 if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the CTC training more freedom).
+  # Get the alignments as lattices (gives the LF-MMI training more freedom).
   # use the same num-jobs as the alignments
   nj=$(cat exp/tri4_ali_nodup${suffix}/num_jobs) || exit 1;
   steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_train_set} \
@@ -106,6 +106,7 @@ if [ $stage -le 9 ]; then
 
 
   # Create the lattices for the reverberated data
+  # We use the lattices/alignments from the clean data for the reverberated data.
   mkdir -p $lat_dir/temp/
   lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
 
@@ -144,6 +145,7 @@ fi
 if [ $stage -le 11 ]; then
   # Build a tree using our new topology. This is the critically different
   # step compared with other recipes.
+  # we build the tree using the clean alignments as we empirically found that this was better.
   steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
       --leftmost-questions-truncate $leftmost_questions_truncate \
       --context-opts "--context-width=2 --central-position=1" \
@@ -167,7 +169,7 @@ if [ $stage -le 12 ]; then
     --ivector-dir $ivector_dir/ivectors_${train_set} \
     --tree-dir $treedir \
     $dim_opts \
-    --splice-indexes "-1,0,1 -1,0,1 -1,0,1 -3,0,3 -3,0,3 -6,0,6 0" \
+    --splice-indexes "$splice_indexes" \
     --use-presoftmax-prior-scale false \
     --xent-regularize $xent_regularize \
     --xent-separate-forward-affine true \
@@ -181,7 +183,7 @@ fi
 if [ $stage -le 13 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
-     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-reverb-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
   fi
 
   steps/nnet3/chain/train.py --stage $train_stage \
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
index 6543b2b0366..5f67e40d0f1 100755
--- a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
@@ -3,45 +3,38 @@
 # This script is based on local/nnet3/run_ivector_common.sh.
 # It reverberates the original data with simulated room impulse responses
 
-. cmd.sh
+. ./cmd.sh
 
-stage=1
+stage=3
 num_data_reps=1  # number of reverberated copies of data to generate
+                 # These will be combined with the original data.
 input_data_dir=train_nodup
 ivector_dir=exp/nnet3_rvb
 speed_perturb=true
 
 set -e
-. cmd.sh
+. ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
 mkdir -p $ivector_dir
 
+# Here we recommend speed perturbation as the gains are significant.
+# The gain from speed perturbation is additive with the gain from data reverberation
 if [ "$speed_perturb" == "true" ]; then
   # perturbed data preparation
-  if [ $stage -le 1 ] && [ ! -d data/${input_data_dir}_sp ]; then
-    #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
+  if [ $stage -le 1 ] && [ ! -f data/${input_data_dir}_sp/feats.scp ]; then
+    # Although the nnet will be trained by high resolution data, we still have to prepare normal-resolution MFCC
+    # for purposes of getting alignments and/or lattices on the speed-perturbed data.
     # _sp stands for speed-perturbed
 
-    for datadir in ${input_data_dir}; do
-      utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
-      utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
-      utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
-      utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
-      rm -r data/temp1 data/temp2
-
-      mfccdir=mfcc_perturbed
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
-        data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
-      steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
-      utils/fix_data_dir.sh data/${datadir}_tmp
-
-      utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
-      utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
-      utils/fix_data_dir.sh data/${datadir}_sp
-      rm -r data/temp0 data/${datadir}_tmp
-    done
+    echo "$0: preparing directory for speed-perturbed data"
+    utils/data/perturb_data_dir_speed_3way.sh data/${input_data_dir} data/${input_data_dir}_sp
+    mfccdir=mfcc_perturbed
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
+      data/${input_data_dir}_sp exp/make_mfcc/${input_data_dir}_sp $mfccdir || exit 1;
+    steps/compute_cmvn_stats.sh data/${input_data_dir}_sp exp/make_mfcc/${input_data_dir}_sp $mfccdir || exit 1;
+    utils/fix_data_dir.sh data/${input_data_dir}_sp
   fi
 
 
@@ -66,6 +59,7 @@ if [ $stage -le 3 ]; then
 
   # corrupt the data to generate reverberated data 
   # this script modifies wav.scp to include the reverberation commands, the real computation will be done at the feature extraction
+  # The script will automatically normalize the probability mass of the rir sets, so user just need to input the ratio of the sets
   # if --include-original-data is true, the original data will be mixed with its reverberated copies
   python steps/data/reverberate_data_dir.py \
     --prefix "rev" \
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 017aedb05a3..0083efa4939 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -360,7 +360,7 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
                                                          max_noises_recording  # Maximum number of point-source noises that can be added
                                                          )       
 
-            # prefix with index 0, e.g. rvb0_swb0035, stangs for the original data
+            # prefix using index 0 is reserved for original data e.g. rvb0_swb0035 corresponds to the swb0035 recording in original data
             if reverberate_opts == "" or i == 0:
                 wav_corrupted_pipe = "{0}".format(wav_original_pipe) 
             else:

From 823bcac416a6c19f73038b0f448aa14d4db85ccf Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Mon, 7 Nov 2016 10:49:39 -0500
Subject: [PATCH 8/9] fixing typo

---
 .../s5c/local/nnet3/multi_condition/run_ivector_common.sh     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
index 5f67e40d0f1..b5acdd27a3c 100755
--- a/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_ivector_common.sh
@@ -5,7 +5,7 @@
 
 . ./cmd.sh
 
-stage=3
+stage=1
 num_data_reps=1  # number of reverberated copies of data to generate
                  # These will be combined with the original data.
 input_data_dir=train_nodup
@@ -98,6 +98,8 @@ fi
 
 # ivector extractor training
 if [ $stage -le 5 ]; then
+  # Here it is good enough to train the lda_mllt transform with the clean data
+  # as it only affects the diagonal GMM which is just used to initialize the full GMM
   steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
     --splice-opts "--left-context=3 --right-context=3" \
     5500 90000 data/train_100k_nodup_hires \

From 01e47f6c0e86879d7e2a77d5e49b14f3769e0dd6 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Mon, 7 Nov 2016 21:55:48 -0500
Subject: [PATCH 9/9] Moving tuning/run_tdnn_7g.sh back to
 multi_condition/run_tdnn_7f.sh

---
 .../run_tdnn_7f.sh}                                | 14 ++++++++------
 egs/swbd/s5c/local/chain/run_tdnn.sh               |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)
 rename egs/swbd/s5c/local/chain/{tuning/run_tdnn_7g.sh => multi_condition/run_tdnn_7f.sh} (94%)

diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh
similarity index 94%
rename from egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
rename to egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh
index cf1343e5041..75b541b49e1 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh
@@ -1,13 +1,15 @@
 #!/bin/bash
 
-# 7g is as 7f, but reverberating the training data with room impulse responses
-# which leads to better results
+# This script (multi_condition/run_tdnn_7f.sh) is the reverberated version of
+# tuning/run_tdnn_7f.sh. It reverberates the training data with room impulse responses
+# which leads to better results.
+# (The reverberation of data is done in multi_condition/run_ivector_common.sh)
 # This script assumes a mixing of the original training data with its reverberated copy
 # and results in a 2-fold training set. Thus the number of epochs is halved to
 # keep the same training time. The model converges after 2 epochs of training,
 # The WER doesn't change much with more epochs of training.
-# local/chain/compare_wer.sh 7f 7g
-# System                       7f        7g
+# local/chain/compare_wer.sh tuning/7f multi_condition/7f
+# System                 tuning/7f  multi_condition/7f
 # WER on train_dev(tg)      14.46     14.27
 # WER on train_dev(fg)      13.23     13.16
 # WER on eval2000(tg)        17.0      16.3
@@ -26,7 +28,7 @@ stage=1
 train_stage=-10
 get_egs_stage=-10
 speed_perturb=true
-dir=exp/chain/tdnn_7g  # Note: _sp will get added to this if $speed_perturb == true.
+dir=exp/chain/tdnn_7f  # Note: _sp will get added to this if $speed_perturb == true.
 decode_iter=
 ivector_dir=exp/nnet3_rvb
 num_data_reps=1        # number of reverberated copies of data to generate
@@ -78,7 +80,7 @@ if [ "$speed_perturb" == "true" ]; then
   suffix=_sp
 fi
 
-dir=${dir}${affix:+_$affix}$suffix
+dir=${dir}${affix:+_$affix}${suffix}_rvb${num_data_reps}
 clean_train_set=${input_train_set}${suffix}
 train_set=${clean_train_set}_rvb${num_data_reps}
 ali_dir=exp/tri4_ali_nodup$suffix
diff --git a/egs/swbd/s5c/local/chain/run_tdnn.sh b/egs/swbd/s5c/local/chain/run_tdnn.sh
index 4b80e886c66..669740d5f27 120000
--- a/egs/swbd/s5c/local/chain/run_tdnn.sh
+++ b/egs/swbd/s5c/local/chain/run_tdnn.sh
@@ -1 +1 @@
-tuning/run_tdnn_7g.sh
\ No newline at end of file
+tuning/run_tdnn_7f.sh
\ No newline at end of file