From a46da4e6a5938692dce94057f4fb6fec2ff60f32 Mon Sep 17 00:00:00 2001
From: Jan Trmal <jtrmal@gmail.com>
Date: Thu, 14 Dec 2017 16:22:46 -0500
Subject: [PATCH 01/10] Adding chime5 baseline recipe

---
 egs/chime5/README.txt                         |  10 +
 egs/chime5/s5/RESULTS                         |  10 +
 egs/chime5/s5/cmd.sh                          |  15 +
 egs/chime5/s5/conf/chime5.cfg                 |  50 ++++
 egs/chime5/s5/conf/decode.config              |   2 +
 egs/chime5/s5/conf/mfcc.conf                  |   2 +
 egs/chime5/s5/conf/mfcc_hires.conf            |  10 +
 egs/chime5/s5/conf/online_cmvn.conf           |   1 +
 egs/chime5/s5/local/chain/compare_wer.sh      | 131 ++++++++
 egs/chime5/s5/local/chain/run_tdnn.sh         |   1 +
 .../s5/local/chain/tuning/run_tdnn_1e.sh      | 283 ++++++++++++++++++
 egs/chime5/s5/local/check_tools.sh            |  46 +++
 egs/chime5/s5/local/json2text.py              |  78 +++++
 egs/chime5/s5/local/nnet3/compare_wer.sh      | 132 ++++++++
 .../s5/local/nnet3/run_ivector_common.sh      | 149 +++++++++
 egs/chime5/s5/local/prepare_data.sh           | 117 ++++++++
 egs/chime5/s5/local/prepare_dict.sh           | 130 ++++++++
 egs/chime5/s5/local/run_beamformit.sh         |  89 ++++++
 egs/chime5/s5/local/score.sh                  |   1 +
 egs/chime5/s5/local/train_lms_srilm.sh        | 270 +++++++++++++++++
 egs/chime5/s5/local/wer_output_filter         |  25 ++
 egs/chime5/s5/path.sh                         |   7 +
 egs/chime5/s5/run.sh                          | 237 +++++++++++++++
 egs/chime5/s5/steps                           |   1 +
 egs/chime5/s5/utils                           |   1 +
 25 files changed, 1798 insertions(+)
 create mode 100644 egs/chime5/README.txt
 create mode 100644 egs/chime5/s5/RESULTS
 create mode 100644 egs/chime5/s5/cmd.sh
 create mode 100755 egs/chime5/s5/conf/chime5.cfg
 create mode 100644 egs/chime5/s5/conf/decode.config
 create mode 100644 egs/chime5/s5/conf/mfcc.conf
 create mode 100644 egs/chime5/s5/conf/mfcc_hires.conf
 create mode 100644 egs/chime5/s5/conf/online_cmvn.conf
 create mode 100755 egs/chime5/s5/local/chain/compare_wer.sh
 create mode 120000 egs/chime5/s5/local/chain/run_tdnn.sh
 create mode 100755 egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh
 create mode 100755 egs/chime5/s5/local/check_tools.sh
 create mode 100755 egs/chime5/s5/local/json2text.py
 create mode 100755 egs/chime5/s5/local/nnet3/compare_wer.sh
 create mode 100755 egs/chime5/s5/local/nnet3/run_ivector_common.sh
 create mode 100755 egs/chime5/s5/local/prepare_data.sh
 create mode 100755 egs/chime5/s5/local/prepare_dict.sh
 create mode 100755 egs/chime5/s5/local/run_beamformit.sh
 create mode 120000 egs/chime5/s5/local/score.sh
 create mode 100755 egs/chime5/s5/local/train_lms_srilm.sh
 create mode 100755 egs/chime5/s5/local/wer_output_filter
 create mode 100644 egs/chime5/s5/path.sh
 create mode 100755 egs/chime5/s5/run.sh
 create mode 120000 egs/chime5/s5/steps
 create mode 120000 egs/chime5/s5/utils

diff --git a/egs/chime5/README.txt b/egs/chime5/README.txt
new file mode 100644
index 00000000000..771857f9433
--- /dev/null
+++ b/egs/chime5/README.txt
@@ -0,0 +1,10 @@
+This is a kaldi recipe for the 5th CHiME Speech Separation and Recognition Challenge (CHiME-5).
+
+The ChiME-5 challenge will consider the problem of distant multi-microphone 
+conversational speech recognition in everyday home environments. Speech material
+was elicited using a dinner party scenario with efforts taken to capture data 
+that is representative of natural conversational speech.
+
+See http://spandh.dcs.shef.ac.uk/chime_challenge/ for more detailed information.
+
+s5 : Default recipe
diff --git a/egs/chime5/s5/RESULTS b/egs/chime5/s5/RESULTS
new file mode 100644
index 00000000000..b57787a0798
--- /dev/null
+++ b/egs/chime5/s5/RESULTS
@@ -0,0 +1,10 @@
+
+# tri2
+%WER 92.26 [ 60741 / 65835, 3212 ins, 35241 del, 22288 sub ] exp/tri2/decode_dev_beamformit_ref/wer_16_1.0
+%WER 76.47 [ 50342 / 65835, 4356 ins, 19004 del, 26982 sub ] exp/tri2/decode_dev_worn/wer_14_1.0
+
+# tri3
+%WER 92.43 [ 60852 / 65835, 3149 ins, 35536 del, 22167 sub ] exp/tri3/decode_dev_beamformit_ref.si/wer_17_1.0
+%WER 90.80 [ 59779 / 65835, 4742 ins, 27968 del, 27069 sub ] exp/tri3/decode_dev_beamformit_ref/wer_17_1.0
+%WER 76.38 [ 50283 / 65835, 3911 ins, 19081 del, 27291 sub ] exp/tri3/decode_dev_worn.si/wer_17_1.0
+%WER 73.13 [ 48146 / 65835, 4727 ins, 17274 del, 26145 sub ] exp/tri3/decode_dev_worn/wer_16_1.0
diff --git a/egs/chime5/s5/cmd.sh b/egs/chime5/s5/cmd.sh
new file mode 100644
index 00000000000..a697a22cda3
--- /dev/null
+++ b/egs/chime5/s5/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+
diff --git a/egs/chime5/s5/conf/chime5.cfg b/egs/chime5/s5/conf/chime5.cfg
new file mode 100755
index 00000000000..70fdd858651
--- /dev/null
+++ b/egs/chime5/s5/conf/chime5.cfg
@@ -0,0 +1,50 @@
+#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+####directories where to store/retrieve info####
+#channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+#show_id = Ttmp
+
diff --git a/egs/chime5/s5/conf/decode.config b/egs/chime5/s5/conf/decode.config
new file mode 100644
index 00000000000..1940883b2f7
--- /dev/null
+++ b/egs/chime5/s5/conf/decode.config
@@ -0,0 +1,2 @@
+beam=11.0 # beam for decoding.  Was 13.0 in the scripts.
+first_beam=8.0 # beam for 1st-pass decoding in SAT.
diff --git a/egs/chime5/s5/conf/mfcc.conf b/egs/chime5/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..32988403b00
--- /dev/null
+++ b/egs/chime5/s5/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false
+--sample-frequency=16000
diff --git a/egs/chime5/s5/conf/mfcc_hires.conf b/egs/chime5/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..fd64b62eb16
--- /dev/null
+++ b/egs/chime5/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=16000 
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=40
+--high-freq=-400
diff --git a/egs/chime5/s5/conf/online_cmvn.conf b/egs/chime5/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/chime5/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/chime5/s5/local/chain/compare_wer.sh b/egs/chime5/s5/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..cd6be14ed88
--- /dev/null
+++ b/egs/chime5/s5/local/chain/compare_wer.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER dev_clean_2 (tgsmall) "
+  "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+    decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+     wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
diff --git a/egs/chime5/s5/local/chain/run_tdnn.sh b/egs/chime5/s5/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..75da1a0a553
--- /dev/null
+++ b/egs/chime5/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1e.sh
\ No newline at end of file
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh
new file mode 100755
index 00000000000..ba8779bcc77
--- /dev/null
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+
+# 1e is as 1d but instead of the --proportional-shrink option, using
+#  the newly added xconfig-layer-specific 'l2-regularize' options.
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=96
+train_set=train_worn_u100k
+test_sets="dev_worn eval_worn dev_beamformit_ref eval_beamformit_ref"
+gmm=tri3
+nnet3_affix=_train_worn_u100k
+lm_suffix=_chime5_tg
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1e   # affix for the TDNN directory name
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=140,100,160
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj ${nj} --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 12 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 13 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  opts="l2-regularize=0.05"
+  output_opts="l2-regularize=0.01"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=512
+  relu-batchnorm-layer name=tdnn2 $opts dim=512 input=Append(-1,0,1)
+  relu-batchnorm-layer name=tdnn3 $opts dim=512
+  relu-batchnorm-layer name=tdnn4 $opts dim=512 input=Append(-1,0,1)
+  relu-batchnorm-layer name=tdnn5 $opts dim=512
+  relu-batchnorm-layer name=tdnn6 $opts dim=512 input=Append(-3,0,3)
+  relu-batchnorm-layer name=tdnn7 $opts dim=512 input=Append(-3,0,3)
+  relu-batchnorm-layer name=tdnn8 $opts dim=512 input=Append(-6,-3,0)
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain $opts dim=512 target-rms=0.5
+  output-layer name=output include-log-softmax=false $output_opts dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn8 $opts dim=512 target-rms=0.5
+  output-layer name=output-xent $output_opts dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 14 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/chime5-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=10 \
+    --trainer.frames-per-iter=3000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=256,128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 15 ]; then
+  # Note: it's not important to give mkgraph.sh the lang directory with the
+  # matched topology (since it gets the topology file from the model).
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang${lm_suffix}/ \
+    $tree_dir $tree_dir/graph${lm_suffix} || exit 1;
+fi
+
+if [ $stage -le 16 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      steps/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --extra-left-context $chunk_left_context \
+          --extra-right-context $chunk_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj 8 --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+          $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+# Not testing the 'looped' decoding separately, because for
+# TDNN systems it would give exactly the same results as the
+# normal decoding.
+
+if $test_online_decoding && [ $stage -le 17 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+        --acwt 1.0 --post-decode-acwt 10.0 \
+        --nj 8 --cmd "$decode_cmd" \
+        $tree_dir/graph${lm_suffix} data/${data} ${dir}_online/decode${lm_suffix}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/chime5/s5/local/check_tools.sh b/egs/chime5/s5/local/check_tools.sh
new file mode 100755
index 00000000000..ef2fe9d5e5d
--- /dev/null
+++ b/egs/chime5/s5/local/check_tools.sh
@@ -0,0 +1,46 @@
+#!/bin/bash -u
+
+# Copyright 2015 (c) Johns Hopkins University (Jan Trmal <jtrmal@gmail.com>)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+[ -f ./path.sh ] && . ./path.sh
+
+uconv=`command -v uconv 2>/dev/null` \
+  || { echo  >&2 "uconv not found on PATH. You will have to install ICU4C"; exit 1; }
+
+srilm=`command -v ngram 2>/dev/null` \
+  || { echo  >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh to install it"; exit 1; }
+
+sox=`command -v sox 2>/dev/null` \
+  || { echo  >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; }
+
+# If sox is found on path, check if the version is correct
+if [ ! -z "$sox" ]; then
+  sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'`
+  if [[ ! $sox_version =~ v14.4.* ]]; then
+    echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher."
+    exit 1
+  fi
+fi
+
+phalign=`command -v phonetisaurus-align 2>/dev/null` \
+  || { echo  >&2 "Phonetisaurus not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_phonetisaurus.sh to install it"; exit 1; }
+
+beamformit=`command -v BeamformIt 2>/dev/null` \
+  || { echo  >&2 "BeamformIt not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_beamformit.sh to install it"; exit 1; }
+
+exit  0
+
+
diff --git a/egs/chime5/s5/local/json2text.py b/egs/chime5/s5/local/json2text.py
new file mode 100755
index 00000000000..a3b81fd7067
--- /dev/null
+++ b/egs/chime5/s5/local/json2text.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import json
+import argparse
+import logging
+import sys
+
+
+def hms_to_seconds(hms):
+    hour = hms.split(':')[0]
+    minute = hms.split(':')[1]
+    second = hms.split(':')[2].split('.')[0]
+
+    # .xx (10 ms order)
+    ms10 = hms.split(':')[2].split('.')[1]
+
+    # total seconds
+    seconds = int(hour) * 3600 + int(minute) * 60 + int(second)
+
+    return '{:07d}'.format(int(str(seconds) + ms10))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('json', type=str, help='JSON transcription file')
+    parser.add_argument('--mictype', type=str,
+                        choices=['ref', 'worn', 'u01', 'u02', 'u03', 'u04', 'u05', 'u06'],
+                        help='Type of microphones')
+    args = parser.parse_args()
+
+    # logging info
+    log_format = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s:%(message)s"
+    logging.basicConfig(level=logging.INFO, format=log_format)
+
+    logging.debug("reading %s", args.json)
+    with open(args.json, 'rt', encoding="utf-8") as f:
+        j = json.load(f)
+
+    for x in j:
+        if '[redacted]' not in x['words']:
+            session_id = x['session_id']
+            speaker_id = x['speaker']
+            if args.mictype == 'ref':
+                mictype = x['ref']
+            elif args.mictype == 'worn':
+                mictype = 'original'
+            else:
+                mictype = args.mictype.upper() # convert from u01 to U01
+
+            start_time = x['start_time'][mictype]
+            end_time = x['end_time'][mictype]
+        
+            # remove meta chars and convert to lower
+            words = x['words'].replace('"', '')\
+                              .replace('.', '')\
+                              .replace('?', '')\
+                              .replace(',', '')\
+                              .replace(':', '')\
+                              .replace(';', '')\
+                              .replace('!', '').lower()
+
+            # remove multiple spaces
+            words = " ".join(words.split())
+
+            # convert to seconds, e.g., 1:10:05.55 -> 3600 + 600 + 5.55 = 4205.55
+            start_time = hms_to_seconds(start_time)
+            end_time = hms_to_seconds(end_time)
+
+            if args.mictype == 'worn':
+                uttid = speaker_id + '_' + session_id + '-' + start_time + '-' + end_time
+            else:
+                uttid = speaker_id + '_' + session_id + '_' + mictype + '-' + start_time + '-' + end_time
+
+            if end_time > start_time:
+                sys.stdout.buffer.write((uttid + ' ' + words + '\n').encode("utf-8"))
diff --git a/egs/chime5/s5/local/nnet3/compare_wer.sh b/egs/chime5/s5/local/nnet3/compare_wer.sh
new file mode 100755
index 00000000000..095e85cc338
--- /dev/null
+++ b/egs/chime5/s5/local/nnet3/compare_wer.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER dev_clean_2 (tgsmall) "
+  "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+    decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+     wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train acc      "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid acc      "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo
diff --git a/egs/chime5/s5/local/nnet3/run_ivector_common.sh b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..58f29f479bc
--- /dev/null
+++ b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# This script is called from local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more
+# scripts).  It contains the common feature preparation and
+# iVector-related parts of the script.  See those scripts for examples
+# of usage.
+
+stage=0
+train_set=train_worn_u100k
+test_sets="dev_worn eval_worn dev_beamformit_ref eval_beamformit_ref"
+gmm=tri3
+nj=96
+
+nnet3_affix=_train_worn_u100k
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+if [ $stage -le 1 ]; then
+  # Although the nnet will be trained by high resolution data, we still have to
+  # perturb the normal data to get the alignment _sp stands for speed-perturbed
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+  echo "$0: making MFCC features for low-resolution speed-perturbed data"
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/${train_set}_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj ${nj} --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/mfcc/chime5-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1;
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing a subset of data to train the diagonal UBM."
+  # We'll use about a quarter of the data.
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+  num_utts=$[$num_utts_total/4]
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+     $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
+    --num-frames 700000 \
+    --num-threads 8 \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 5 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of
+  # 100.
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 20 \
+     data/${train_set}_sp_hires exp/nnet3${nnet3_affix}/diag_ubm \
+     exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+
+if [ $stage -le 6 ]; then
+  # We extract iVectors on the speed-perturbed training data after combining
+  # short segments, which will be what we train the system on.  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker; this gives more diversity in iVectors..
+  # Note that these are extracted 'online'.
+
+  # note, we don't encode the 'max2' in the name of the ivectordir even though
+  # that's the data we extract the ivectors from, as it's still going to be
+  # valid for the non-'max2' data, the utterance list is the same.
+
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/chime5-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  temp_data_root=${ivectordir}
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${train_set}_sp_hires ${temp_data_root}/${train_set}_sp_hires_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj ${nj} \
+    ${temp_data_root}/${train_set}_sp_hires_max2 \
+    exp/nnet3${nnet3_affix}/extractor $ivectordir
+
+  # Also extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+  for data in $test_sets; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
+      data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
+      exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+  done
+fi
+
+exit 0
diff --git a/egs/chime5/s5/local/prepare_data.sh b/egs/chime5/s5/local/prepare_data.sh
new file mode 100755
index 00000000000..c273b1b42f2
--- /dev/null
+++ b/egs/chime5/s5/local/prepare_data.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
+# Apache 2.0
+
+# Begin configuration section.
+mictype=worn # worn, ref or others
+cleanup=true
+# End configuration section
+. ./utils/parse_options.sh  # accept options.. you can run this run.sh with the
+
+. ./path.sh
+
+echo >&2 "$0" "$@"
+if [ $# -ne 3 ] ; then
+  echo >&2 "$0" "$@"
+  echo >&2 "$0: Error: wrong number of arguments"
+  echo -e >&2 "Usage:\n  $0 [opts] <audio-dir> <json-transcript-dir> <output-dir>"
+  echo -e >&2 "eg:\n  $0 /corpora/chime5/audio/train /corpora/chime5/transcriptions/train data/train"
+  exit 1
+fi
+
+adir=$1
+jdir=$2
+dir=$3
+
+echo "$0: Converting transcription to text"
+
+mkdir -p $dir
+for file in $jdir/*json; do
+  ./local/json2text.py --mictype $mictype $file
+done | \
+  sed -e "s/\[inaudible[- 0-9]*\]/[inaudible]/g" |\
+  sed -e 's/ - / /g' |\
+  sed -e 's/mm-/mm/g' > $dir/text.orig
+
+echo "$0: Creating datadir $dir for type=\"$mictype\""
+
+if [ $mictype == "worn" ]; then
+  # convert the filenames to wav.scp format, use the basename of the file
+  # as a the wav.scp key, add _L and _R for left and right channel
+  # i.e. each file will have two entries (left and right channel)
+  find $adir -name  "S[0-9]*_P[0-9]*.wav" | \
+    perl -ne '{
+      chomp;
+      $path = $_;
+      next unless $path;
+      @F = split "/", $path;
+      ($f = $F[@F-1]) =~ s/.wav//;
+      @F = split "_", $f;
+      print "${F[1]}_${F[0]}_L sox $path -t wav - remix 1 |\n";
+      print "${F[1]}_${F[0]}_R sox $path -t wav - remix 2 |\n";
+    }' | sort > $dir/wav.scp
+
+  # generate the transcripts for both left and right channel
+  # from the original transcript in the form
+  # P09_S03-0006072-0006147 gimme the baker
+  # create left and right channel transcript
+  # P09_S03_L-0006072-0006147 gimme the baker
+  # P09_S03_R-0006072-0006147 gimme the baker
+  sed -n 's/  *$//; h; s/-/_L-/p; g; s/-/_R-/p' $dir/text.orig | sort > $dir/text
+elif [ $mictype == "ref" ]; then
+  # fixed reference array
+
+  # first get a text, which will be used to extract reference arrays
+  perl -ne 's/-/.ENH-/;print;' $dir/text.orig | sort > $dir/text
+
+  find $adir | grep "\.wav" | sort > $dir/wav.flist
+  # following command provide the argument for grep to extract only reference arrays
+  grep `cut -f 1 -d"-" $dir/text | awk -F"_" '{print $2 "_" $3}' | sed -e "s/\.ENH//" | sort | uniq | sed -e "s/^/ -e /" | tr "\n" " "` $dir/wav.flist > $dir/wav.flist2
+  paste -d" " \
+	<(awk -F "/" '{print $NF}' $dir/wav.flist2 | sed -e "s/\.wav/.ENH/") \
+	$dir/wav.flist2 | sort > $dir/wav.scp
+else
+  # array mic case
+  # convert the filenames to wav.scp format, use the basename of the file
+  # as a the wav.scp key
+  find $adir -name "*.wav" -ipath "*${mictype}*" |\
+    perl -ne '$p=$_;chomp $_;@F=split "/";$F[$#F]=~s/\.wav//;print "$F[$#F] $p";' |\
+    sort -u > $dir/wav.scp
+
+  # convert the transcripts from
+  # P09_S03-0006072-0006147 gimme the baker
+  # to the per-channel transcripts
+  # P09_S03_U01.CH1-0006072-0006147 gimme the baker
+  # P09_S03_U01.CH2-0006072-0006147 gimme the baker
+  # P09_S03_U01.CH3-0006072-0006147 gimme the baker
+  # P09_S03_U01.CH4-0006072-0006147 gimme the baker
+  perl -ne '$l=$_;
+    for($i=1; $i<=4; $i++) {
+      ($x=$l)=~ s/-/.CH\Q$i\E-/;
+      print $x;}' $dir/text.orig | sort > $dir/text
+
+fi
+$cleanup && rm -f $dir/text.* $dir/wav.scp.* $dir/wav.flist
+
+# Prepare 'segments', 'utt2spk', 'spk2utt'
+if [ $mictype == "worn" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' \
+    > $dir/segments
+elif [ $mictype == "ref" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/ P.._/ /" > $dir/segments
+else
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e 's/ P.._/ /' > $dir/segments
+fi
+cut -f 1 -d ' ' $dir/segments | \
+  perl -ne 'chomp;$utt=$_;s/_.*//;print "$utt $_\n";' > $dir/utt2spk
+
+utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
+
+# Check that data dirs are okay!
+utils/validate_data_dir.sh --no-feats $dir || exit 1
diff --git a/egs/chime5/s5/local/prepare_dict.sh b/egs/chime5/s5/local/prepare_dict.sh
new file mode 100755
index 00000000000..31d5ff9c77c
--- /dev/null
+++ b/egs/chime5/s5/local/prepare_dict.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
+# License: Apache 2.0
+
+# Begin configuration section.
+# End configuration section
+. ./utils/parse_options.sh
+
+. ./path.sh
+
+set -e -o pipefail
+set -o nounset                              # Treat unset variables as an error
+
+
+# The parts of the output of this that will be needed are
+# [in data/local/dict/ ]
+# lexicon.txt
+# extra_questions.txt
+# nonsilence_phones.txt
+# optional_silence.txt
+# silence_phones.txt
+
+
+# check existing directories
+[ $# != 0 ] && echo "Usage: $0" && exit 1;
+
+# This script also needs the phonetisaurus g2p, srilm,subversion,
+# and ICU4C installed. We test for these things during the kaldi instalation
+# and during when the master script is run, so we do not run any tests here.
+. ./local/check_tools.sh
+
+dir=data/local/dict_nosp
+
+mkdir -p $dir
+echo "$0: Getting CMU dictionary"
+if [ ! -f $dir/cmudict.done ]; then
+  [ -d $dir/cmudict ] && rm -rf $dir/cmudict
+  svn co https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict $dir/cmudict
+  touch $dir/cmudict.done
+fi
+
+# silence phones, one per line.
+for w in sil spn inaudible laughs noise; do
+  echo $w;
+done > $dir/silence_phones.txt
+echo sil > $dir/optional_silence.txt
+
+# For this setup we're discarding stress.
+cat $dir/cmudict/cmudict-0.7b.symbols | \
+  perl -ne 's:[0-9]::g; s:\r::; print lc($_)' | \
+  sort -u > $dir/nonsilence_phones.txt
+
+# An extra question will be added by including the silence phones in one class.
+paste -d ' ' -s $dir/silence_phones.txt > $dir/extra_questions.txt
+
+grep -v ';;;' $dir/cmudict/cmudict-0.7b |\
+  uconv -f latin1 -t utf-8 -x Any-Lower |\
+  perl -ne 's:(\S+)\(\d+\) :$1 :; s:  : :; print;' |\
+  perl -ne '@F = split " ",$_,2; $F[1] =~ s/[0-9]//g; print "$F[0] $F[1]";' \
+  > $dir/lexicon1_raw_nosil.txt || exit 1;
+
+# Add prons for laughter, noise, oov
+for w in `grep -v sil $dir/silence_phones.txt`; do
+  echo "[$w] $w"
+done | cat - $dir/lexicon1_raw_nosil.txt > $dir/lexicon2_raw.txt || exit 1;
+
+# we keep all words from the cmudict in the lexicon
+# might reduce OOV rate on dev and eval
+cat $dir/lexicon2_raw.txt  \
+   <( echo "mm m"
+      echo "<unk> spn"
+      echo "cuz k aa z"
+      echo "cuz k ah z"
+      echo "cuz k ao z"
+      echo "mmm m"; \
+      echo "hmm hh m"; \
+    ) | sort -u | sed 's/[\t ]/\t/' > $dir/iv_lexicon.txt
+
+
+cat data/train*/text  | \
+  awk '{for (n=2;n<=NF;n++){ count[$n]++; } } END { for(n in count) { print count[n], n; }}' | \
+  sort -nr > $dir/word_counts
+
+cat $dir/word_counts | awk '{print $2}' > $dir/word_list
+
+awk '{print $1}' $dir/iv_lexicon.txt | \
+  perl -e '($word_counts)=@ARGV;
+   open(W, "<$word_counts")||die "opening word-counts $word_counts";
+   while(<STDIN>) { chop; $seen{$_}=1; }
+   while(<W>) {
+     ($c,$w) = split;
+     if (!defined $seen{$w}) { print; }
+   } ' $dir/word_counts > $dir/oov_counts.txt
+
+set -x
+echo "*Highest-count OOVs (including fragments) are:"
+head -n 10 $dir/oov_counts.txt
+echo "*Highest-count OOVs (excluding fragments) are:"
+grep -v -E '^-|-$' $dir/oov_counts.txt | head -n 10 || true
+
+echo "*Training a G2P and generating missing pronunciations"
+mkdir -p $dir/g2p/
+phonetisaurus-align --input=$dir/iv_lexicon.txt --ofile=$dir/g2p/aligned_lexicon.corpus
+ngram-count -order 4 -kn-modify-counts-at-end -ukndiscount\
+  -gt1min 0 -gt2min 0 -gt3min 0 -gt4min 0 \
+  -text $dir/g2p/aligned_lexicon.corpus -lm $dir/g2p/aligned_lexicon.arpa
+phonetisaurus-arpa2wfst --lm=$dir/g2p/aligned_lexicon.arpa --ofile=$dir/g2p/g2p.fst
+awk '{print $2}' $dir/oov_counts.txt > $dir/oov_words.txt
+phonetisaurus-apply --nbest 2 --model $dir/g2p/g2p.fst --thresh 5 --accumulate \
+  --word_list $dir/oov_words.txt > $dir/oov_lexicon.txt
+
+## The next section is again just for debug purposes
+## to show words for which the G2P failed
+cat $dir/oov_lexicon.txt $dir/iv_lexicon.txt | sort -u > $dir/lexicon.txt
+rm -f $dir/lexiconp.txt 2>/dev/null; # can confuse later script if this exists.
+awk '{print $1}' $dir/lexicon.txt | \
+  perl -e '($word_counts)=@ARGV;
+   open(W, "<$word_counts")||die "opening word-counts $word_counts";
+   while(<STDIN>) { chop; $seen{$_}=1; }
+   while(<W>) {
+     ($c,$w) = split;
+     if (!defined $seen{$w}) { print; }
+   } ' $dir/word_counts > $dir/oov_counts.g2p.txt
+
+echo "*Highest-count OOVs (including fragments) after G2P are:"
+head -n 10 $dir/oov_counts.g2p.txt
+
+utils/validate_dict_dir.pl $dir
+exit 0;
+
diff --git a/egs/chime5/s5/local/run_beamformit.sh b/egs/chime5/s5/local/run_beamformit.sh
new file mode 100755
index 00000000000..78f740339fa
--- /dev/null
+++ b/egs/chime5/s5/local/run_beamformit.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
+
+. ./cmd.sh
+. ./path.sh
+
+# Config:
+cmd=run.pl
+bmf="1 2 3 4"
+
+. utils/parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: local/run_beamformit.sh [options] <wav-in-dir> <wav-out-dir> <array-id>"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --bmf \"1 2 3 4\"                        # microphones used for beamforming"
+   exit 1;
+fi
+
+sdir=$1
+odir=$2
+array=$3
+expdir=exp/enhan/`echo $odir | awk -F '/' '{print $NF}'`_`echo $bmf | tr ' ' '_'`
+
+if [ -z $BEAMFORMIT ] ; then
+  export BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt
+fi
+export PATH=${PATH}:$BEAMFORMIT
+! hash BeamformIt && echo "Missing BeamformIt, run 'cd $KALDI_ROOT/tools/; ./extras/install_beamformit.sh; cd -;'" && exit 1
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+mkdir -p $odir
+mkdir -p $expdir/log
+
+echo "Will use the following channels: $bmf"
+# number of channels
+numch=`echo $bmf | tr ' ' '\n' | wc -l`
+echo "the number of channels: $numch"
+
+# wavfiles.list can be used as the name of the output files
+output_wavfiles=$expdir/wavfiles.list
+find ${sdir} | grep -i ${array} | awk -F "/" '{print $NF}' | sed -e "s/\.CH.\.wav//" | sort | uniq > $expdir/wavfiles.list
+
+# this is an input file list of the microphones
+# format: 1st_wav 2nd_wav ... nth_wav
+input_arrays=$expdir/channels_$numch
+for x in `cat $output_wavfiles`; do
+  echo -n "$x"
+  for ch in $bmf; do
+    echo -n " $x.CH$ch.wav"
+  done
+  echo ""
+done > $input_arrays
+
+# split the list for parallel processing
+# number of jobs are set by the number of WAV files
+nj=`wc -l $expdir/wavfiles.list | awk '{print $1}'`
+split_wavfiles=""
+for n in `seq $nj`; do
+  split_wavfiles="$split_wavfiles $output_wavfiles.$n"
+done
+utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1;
+
+echo -e "Beamforming\n"
+# making a shell script for each job
+for n in `seq $nj`; do
+cat << EOF > $expdir/log/beamform.$n.sh
+while read line; do
+  $BEAMFORMIT/BeamformIt -s \$line -c $input_arrays \
+    --config_file `pwd`/conf/chime5.cfg \
+    --source_dir $sdir \
+    --result_dir $odir
+done < $output_wavfiles.$n
+EOF
+done
+
+chmod a+x $expdir/log/beamform.*.sh
+$cmd JOB=1:$nj $expdir/log/beamform.JOB.log \
+  $expdir/log/beamform.JOB.sh
+
+echo "`basename $0` Done."
diff --git a/egs/chime5/s5/local/score.sh b/egs/chime5/s5/local/score.sh
new file mode 120000
index 00000000000..6a200b42ed3
--- /dev/null
+++ b/egs/chime5/s5/local/score.sh
@@ -0,0 +1 @@
+../steps/scoring/score_kaldi_wer.sh
\ No newline at end of file
diff --git a/egs/chime5/s5/local/train_lms_srilm.sh b/egs/chime5/s5/local/train_lms_srilm.sh
new file mode 100755
index 00000000000..09bba818ba6
--- /dev/null
+++ b/egs/chime5/s5/local/train_lms_srilm.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+# Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
+# Apache 2.0
+
+export LC_ALL=C
+
+# Begin configuration section.
+words_file=
+train_text=
+dev_text=
+oov_symbol="<UNK>"
+# End configuration section
+
+echo "$0 $@"
+
+[ -f path.sh ]  && . ./path.sh
+. ./utils/parse_options.sh || exit 1
+
+echo "-------------------------------------"
+echo "Building an SRILM language model     "
+echo "-------------------------------------"
+
+if [ $# -ne 2 ] ; then
+  echo "Incorrect number of parameters. "
+  echo "Script has to be called like this:"
+  echo "  $0 [switches] <datadir> <tgtdir>"
+  echo "For example: "
+  echo "  $0 data data/srilm"
+  echo "The allowed switches are: "
+  echo "    words_file=<word_file|>        word list file -- data/lang/words.txt by default"
+  echo "    train_text=<train_text|>       data/train/text is used in case when not specified"
+  echo "    dev_text=<dev_text|>           last 10 % of the train text is used by default"
+  echo "    oov_symbol=<unk_sumbol|<UNK>>  symbol to use for oov modeling -- <UNK> by default"
+  exit 1
+fi
+
+datadir=$1
+tgtdir=$2
+
+##End of configuration
+loc=`which ngram-count`;
+if [ -z $loc ]; then
+  if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+    sdir=`pwd`/../../../tools/srilm/bin/i686-m64
+  else
+    sdir=`pwd`/../../../tools/srilm/bin/i686
+  fi
+  if [ -f $sdir/ngram-count ]; then
+    echo Using SRILM tools from $sdir
+    export PATH=$PATH:$sdir
+  else
+    echo You appear to not have SRILM tools installed, either on your path,
+    echo or installed in $sdir.  See tools/install_srilm.sh for installation
+    echo instructions.
+    exit 1
+  fi
+fi
+
+# Prepare the destination directory
+mkdir -p $tgtdir
+
+for f in $words_file $train_text $dev_text; do
+  [ ! -s $f ] && echo "No such file $f" && exit 1;
+done
+
+[ -z $words_file ] && words_file=$datadir/lang/words.txt
+if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then
+  nr=`cat  $train_text | wc -l`
+  nr_dev=$(($nr / 10 ))
+  nr_train=$(( $nr - $nr_dev ))
+  orig_train_text=$train_text
+  head -n $nr_train $train_text > $tgtdir/train_text
+  tail -n $nr_dev $train_text > $tgtdir/dev_text
+
+  train_text=$tgtdir/train_text
+  dev_text=$tgtdir/dev_text
+  echo "Using words file: $words_file"
+  echo "Using train text: 9/10 of $orig_train_text"
+  echo "Using dev text  : 1/10 of $orig_train_text"
+elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then
+  echo "Using words file: $words_file"
+  echo "Using train text: $train_text"
+  echo "Using dev text  : $dev_text"
+  train_text=$train_text
+  dev_text=$dev_text
+else
+  train_text=$datadir/train/text
+  dev_text=$datadir/dev2h/text
+  echo "Using words file: $words_file"
+  echo "Using train text: $train_text"
+  echo "Using dev text  : $dev_text"
+
+fi
+
+[ ! -f $words_file ] && echo >&2 "File $words_file must exist!" && exit 1
+[ ! -f $train_text ] && echo >&2 "File $train_text must exist!" && exit 1
+[ ! -f $dev_text ] && echo >&2 "File $dev_text must exist!" && exit 1
+
+
+# Extract the word list from the training dictionary; exclude special symbols
+sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '<eps>' | grep -v -F "$oov_symbol" > $tgtdir/vocab
+if (($?)); then
+  echo "Failed to create vocab from $words_file"
+  exit 1
+else
+  # wc vocab # doesn't work due to some encoding issues
+  echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'`
+fi
+
+# Kaldi transcript files contain Utterance_ID as the first word; remove it
+# We also have to avoid skewing the LM by incorporating  the same sentences
+# from different channels
+sed -e "s/\.CH.//" -e "s/_.\-./_/" $train_text | sort -u | \
+  perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/train.txt
+if (($?)); then
+    echo "Failed to create $tgtdir/train.txt from $train_text"
+    exit 1
+else
+    echo "Removed first word (uid) from every line of $train_text"
+    # wc text.train train.txt # doesn't work due to some encoding issues
+    echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'`
+    echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'`
+fi
+
+set -x
+# Kaldi transcript files contain Utterance_ID as the first word; remove it
+sed -e "s/\.CH.//" -e "s/_.\-./_/" $dev_text | sort -u | \
+  perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/dev.txt
+if (($?)); then
+    echo "Failed to create $tgtdir/dev.txt from $dev_text"
+    exit 1
+else
+    echo "Removed first word (uid) from every line of $dev_text"
+    # wc text.train train.txt # doesn't work due to some encoding issues
+    echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'`
+    echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F;  $s++;}END{print "$w words, $s sentences\n";}'`
+fi
+
+
+echo "-------------------"
+echo "Good-Turing 3grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 3grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn111.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn112.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn122.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn123.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+
+echo "-------------------"
+echo "Good-Turing 4grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/4gram.gt0111.gz \
+  -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0112.gz \
+  -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0122.gz \
+  -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0123.gz \
+  -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0113.gz \
+  -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0222.gz \
+  -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0223.gz \
+  -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 4grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/4gram.kn0111.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0112.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0113.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0122.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0123.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0222.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0223.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+if [ ! -z ${LIBLBFGS} ]; then
+  #please note that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault
+  #instead of that, we simply output the model in the maxent format and convert it using the "ngram"
+  echo "-------------------"
+  echo "Maxent 3grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    ngram -lm - -order 3 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 4grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    ngram -lm - -order 4 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1
+else
+  echo "Skipping MaxEnt models"
+fi
+
+
+echo "--------------------"
+echo "Computing perplexity"
+echo "--------------------"
+(
+  for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done
+  for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done
+)  | sort  -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt
+
+echo "The perlexity scores report is stored in $tgtdir/perplexities.txt "
+echo ""
+
+for best_ngram in {3,4}gram ; do
+  outlm=best_${best_ngram}.gz
+  lmfilename=$(grep "${best_ngram}" $tgtdir/perplexities.txt | head -n 1 | cut -f 1 -d ' ')
+  echo "$outlm -> $lmfilename"
+  (cd $tgtdir; rm -f $outlm; ln -sf $(basename $lmfilename) $outlm )
+done
diff --git a/egs/chime5/s5/local/wer_output_filter b/egs/chime5/s5/local/wer_output_filter
new file mode 100755
index 00000000000..6f4b6400716
--- /dev/null
+++ b/egs/chime5/s5/local/wer_output_filter
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal <jtrmal@gmail.com>)
+# Apache 2.0
+
+
+## Filter for scoring of the STT results. Convert everything to lowercase
+## and add some ad-hoc fixes for the hesitations
+
+perl -e '
+   while(<STDIN>) {
+     @A  = split(" ", $_);
+     $id = shift @A; print "$id ";
+     foreach $a (@A) {
+       print lc($a) . " " unless $a =~ /\[.*\]/;
+     }
+     print "\n";
+    }' | \
+sed -e '
+    s/\<mhm\>/hmm/g;
+    s/\<mm\>/hmm/g;
+    s/\<mmm\>/hmm/g;
+'
+
+#| uconv -f  utf-8  -t utf-8 -x Latin-ASCII
+
diff --git a/egs/chime5/s5/path.sh b/egs/chime5/s5/path.sh
new file mode 100644
index 00000000000..fb1c0489386
--- /dev/null
+++ b/egs/chime5/s5/path.sh
@@ -0,0 +1,7 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
new file mode 100755
index 00000000000..c3b724fe5b4
--- /dev/null
+++ b/egs/chime5/s5/run.sh
@@ -0,0 +1,237 @@
+#!/bin/bash
+#
+# Based mostly on the TED-LIUM and Switchboard recipe
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
+# Apache 2.0
+#
+
+# Begin configuration section.
+nj=96
+decode_nj=20
+stage=0
+enhancement=beamformit # for a new enhancement method,
+                       # change this variable and stage 4
+# End configuration section
+. ./utils/parse_options.sh
+
+. ./cmd.sh
+. ./path.sh
+
+
+set -e # exit on error
+
+# chime5 main directory path
+# please change the path accordingly
+chime5_corpus=/export/corpora4/CHiME5
+json_dir=${chime5_corpus}/data/transcriptions
+audio_dir=${chime5_corpus}/data/audio
+
+# training and test data
+train_set=train_worn_u100k
+test_sets="dev_worn dev_${enhancement}_ref"
+# use the below once you obtain the evaluation data. Also remove the comment #eval# in the lines below
+#eval#test_sets="dev_worn eval_worn dev_${enhancement}_ref eval_${enhancement}_ref"
+
+./local/check_tools.sh || exit 1
+
+if [ $stage -le 1 ]; then
+  # skip u03 as they are missing
+  for mictype in worn u01 u02 u04 u05 u06; do
+    local/prepare_data.sh --mictype ${mictype} \
+			  ${audio_dir}/train ${json_dir}/train data/train_${mictype}
+  done
+  #eval#for dataset in dev eval; do
+  for dataset in dev; do
+    for mictype in worn; do
+      local/prepare_data.sh --mictype ${mictype} \
+			    ${audio_dir}/${dataset} ${json_dir}/${dataset} \
+			    data/${dataset}_${mictype}
+    done
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  local/prepare_dict.sh
+
+  utils/prepare_lang.sh \
+    data/local/dict_nosp "<unk>" data/local/lang_nosp data/lang_nosp
+
+  local/train_lms_srilm.sh \
+    --train-text data/train_worn/text --dev-text data/dev_worn/text \
+    --oov-symbol "<unk>" --words-file data/lang_nosp/words.txt \
+    data/ data/srilm
+fi
+
+LM=data/srilm/best_3gram.gz
+if [ $stage -le 3 ]; then
+  # Compiles G for chime5 trigram LM
+  utils/format_lm.sh \
+		data/lang_nosp $LM data/local/dict_nosp/lexicon.txt data/lang_nosp_test
+
+fi
+
+if [ $stage -le 4 ]; then
+  # Beamforming using reference arrays
+  # enhanced WAV directory
+  enhandir=enhan
+  #eval#for dset in dev eval; do
+  for dset in dev; do
+    for mictype in u01 u02 u03 u04 u05 u06; do
+      local/run_beamformit.sh --cmd "$train_cmd" \
+			      ${audio_dir}/${dset} \
+			      ${enhandir}/${dset}_${enhancement}_${mictype} \
+			      ${mictype}
+    done
+  done
+
+  #eval#for dset in dev eval; do
+  for dset in dev; do
+    local/prepare_data.sh --mictype ref "$PWD/${enhandir}/${dset}_${enhancement}_u0*" \
+			  ${json_dir}/${dset} data/${dset}_${enhancement}_ref
+  done
+fi
+
+if [ $stage -le 5 ]; then
+  # remove possibly bad sessions (P11_S03, P52_S19, P53_S24, P54_S24)
+  utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
+  grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
+  utils/fix_data_dir.sh data/train_worn
+
+  # combine mix array and worn mics
+  # randomly extract first 100k utterances from all mics
+  # If you want to include more training data, you can increase the number of array mic utterances
+  utils/combine_data.sh data/train_uall data/train_u01 data/train_u02 data/train_u04 data/train_u05 data/train_u06
+  utils/subset_data_dir.sh data/train_uall 100000 data/train_u100k
+  utils/combine_data.sh data/${train_set} data/train_worn data/train_u100k
+
+  # only use left channel for worn mic recognition
+  # you can use both left and right channels for training
+  #eval#for dset in train dev eval; do
+  for dset in train dev; do
+    utils/copy_data_dir.sh data/${dset}_worn data/${dset}_worn_stereo
+    grep "_L-" data/${dset}_worn_stereo/text > data/${dset}_worn/text
+    utils/fix_data_dir.sh data/${dset}_worn
+  done
+fi
+
+if [ $stage -le 6 ]; then
+  # Split speakers up into 3-minute chunks.  This doesn't hurt adaptation, and
+  # lets us use more jobs for decoding etc.
+  for dset in ${train_set} ${test_sets}; do
+    utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit
+    utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset}
+  done
+fi
+
+if [ $stage -le 7 ]; then
+  # Now make MFCC features.
+  # mfccdir should be some place with a largish disk where you
+  # want to store MFCC features.
+  mfccdir=mfcc
+  for x in ${train_set} ${test_sets}; do
+    steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" \
+		       data/$x exp/make_mfcc/$x $mfccdir
+    steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
+    utils/fix_data_dir.sh data/$x
+  done
+fi
+
+if [ $stage -le 8 ]; then
+  # make a subset for monophone training
+  utils/subset_data_dir.sh --shortest data/${train_set} 100000 data/${train_set}_100kshort
+  utils/subset_data_dir.sh data/${train_set}_100kshort 30000 data/${train_set}_30kshort
+fi
+
+if [ $stage -le 9 ]; then
+  # Starting basic training on MFCC features
+  steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
+		      data/${train_set}_30kshort data/lang_nosp exp/mono
+fi
+
+if [ $stage -le 10 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+		    data/${train_set} data/lang_nosp exp/mono exp/mono_ali
+
+  steps/train_deltas.sh --cmd "$train_cmd" \
+			2500 30000 data/${train_set} data/lang_nosp exp/mono_ali exp/tri1
+fi
+
+if [ $stage -le 11 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+		    data/${train_set} data/lang_nosp exp/tri1 exp/tri1_ali
+
+  steps/train_lda_mllt.sh --cmd "$train_cmd" \
+			  4000 50000 data/${train_set} data/lang_nosp exp/tri1_ali exp/tri2
+fi
+
+if [ $stage -le 12 ]; then
+  utils/mkgraph.sh data/lang_nosp_test exp/tri2 exp/tri2/graph_nosp
+  for dset in ${test_sets}; do
+    steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
+		    exp/tri2/graph_nosp data/${dset} exp/tri2/decode_${dset}_nosp
+  done
+fi
+
+if [ $stage -le 13 ]; then
+  # create a more refined lexicon (include pronunciation probabilities)
+  steps/get_prons.sh --cmd "$train_cmd" \
+		data/${train_set} data/lang_nosp exp/tri2
+
+	utils/dict_dir_add_pronprobs.sh --max-normalize true \
+	  data/local/dict_nosp exp/tri2/pron_counts_nowb.txt \
+    exp/tri2/sil_counts_nowb.txt \
+    exp/tri2/pron_bigram_counts_nowb.txt data/local/dict
+
+  # add explicit phone loop for <unk> model
+  utils/lang/make_unk_lm.sh --use-pocolm false \
+		data/local/dict exp/make_unk
+
+  # and compile the lang directory
+  utils/prepare_lang.sh \
+    --unk-fst exp/make_unk/unk_fst.txt \
+		--phone-symbol-table data/lang_nosp/phones.txt \
+    data/local/dict "<unk>" data/local/lang_test data/lang_test
+
+  # and convert the LM in arpa to G.fst
+  utils/format_lm.sh \
+		data/lang_test $LM data/local/dict/lexicon.txt data/lang_test
+fi
+
+if [ $stage -le 14 ]; then
+  utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
+  for dset in ${test_sets}; do
+    steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
+		    exp/tri2/graph data/${dset} exp/tri2/decode_${dset} &
+  done
+  wait
+fi
+
+if [ $stage -le 15 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+		    data/${train_set} data/lang exp/tri2 exp/tri2_ali
+
+  steps/train_sat.sh --cmd "$train_cmd" \
+		     5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3
+fi
+
+if [ $stage -le 16 ]; then
+  utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph
+  for dset in ${test_sets}; do
+    steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
+			  exp/tri3/graph data/${dset} exp/tri3/decode_${dset} &
+  done
+  wait
+fi
+
+if [ $stage -le 17 ]; then
+  # The following script cleans the data and produces cleaned data
+  steps/cleanup/clean_and_segment_data.sh --nj ${nj} --cmd "$train_cmd" \
+    --segmentation-opts "--min-segment-length 0.3 --min-new-segment-length 0.6" \
+    data/${train_set} data/lang exp/tri3 exp/tri3_cleaned data/${train_set}_cleaned
+fi
+
+if [ $stage -le 18 ]; then
+  # chain TDNN
+  local/chain/run_tdnn.sh --nj ${nj} --test_sets "$test_sets"
+fi
diff --git a/egs/chime5/s5/steps b/egs/chime5/s5/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/chime5/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/chime5/s5/utils b/egs/chime5/s5/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/chime5/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file

From b09ee9c54bb28ee2b4334b871b7a4fd9352d613f Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Wed, 7 Mar 2018 15:56:51 -0500
Subject: [PATCH 02/10] [egs] fixed a data path and bug in data prep at chime5

---
 egs/chime5/s5/local/prepare_dict.sh | 5 -----
 egs/chime5/s5/run.sh                | 5 +++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/egs/chime5/s5/local/prepare_dict.sh b/egs/chime5/s5/local/prepare_dict.sh
index 31d5ff9c77c..468f2e915d0 100755
--- a/egs/chime5/s5/local/prepare_dict.sh
+++ b/egs/chime5/s5/local/prepare_dict.sh
@@ -24,11 +24,6 @@ set -o nounset                              # Treat unset variables as an error
 # check existing directories
 [ $# != 0 ] && echo "Usage: $0" && exit 1;
 
-# This script also needs the phonetisaurus g2p, srilm,subversion,
-# and ICU4C installed. We test for these things during the kaldi instalation
-# and during when the master script is run, so we do not run any tests here.
-. ./local/check_tools.sh
-
 dir=data/local/dict_nosp
 
 mkdir -p $dir
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index c3b724fe5b4..077a60654c4 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -24,8 +24,8 @@ set -e # exit on error
 # chime5 main directory path
 # please change the path accordingly
 chime5_corpus=/export/corpora4/CHiME5
-json_dir=${chime5_corpus}/data/transcriptions
-audio_dir=${chime5_corpus}/data/audio
+json_dir=${chime5_corpus}/transcriptions
+audio_dir=${chime5_corpus}/audio
 
 # training and test data
 train_set=train_worn_u100k
@@ -33,6 +33,7 @@ test_sets="dev_worn dev_${enhancement}_ref"
 # use the below once you obtain the evaluation data. Also remove the comment #eval# in the lines below
 #eval#test_sets="dev_worn eval_worn dev_${enhancement}_ref eval_${enhancement}_ref"
 
+# This script also needs the phonetisaurus g2p, srilm, beamformit
 ./local/check_tools.sh || exit 1
 
 if [ $stage -le 1 ]; then

From 6c6918947df0035e3502df522b2187def58acada Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Thu, 8 Mar 2018 08:51:35 -0500
Subject: [PATCH 03/10] [egs] fixed a wrong directory name in data/lang

---
 egs/chime5/s5/run.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index 077a60654c4..11824af2c31 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -179,28 +179,28 @@ if [ $stage -le 13 ]; then
   steps/get_prons.sh --cmd "$train_cmd" \
 		data/${train_set} data/lang_nosp exp/tri2
 
-	utils/dict_dir_add_pronprobs.sh --max-normalize true \
-	  data/local/dict_nosp exp/tri2/pron_counts_nowb.txt \
+  utils/dict_dir_add_pronprobs.sh --max-normalize true \
+    data/local/dict_nosp exp/tri2/pron_counts_nowb.txt \
     exp/tri2/sil_counts_nowb.txt \
     exp/tri2/pron_bigram_counts_nowb.txt data/local/dict
 
   # add explicit phone loop for <unk> model
   utils/lang/make_unk_lm.sh --use-pocolm false \
-		data/local/dict exp/make_unk
+    data/local/dict exp/make_unk
 
   # and compile the lang directory
   utils/prepare_lang.sh \
     --unk-fst exp/make_unk/unk_fst.txt \
-		--phone-symbol-table data/lang_nosp/phones.txt \
-    data/local/dict "<unk>" data/local/lang_test data/lang_test
+    --phone-symbol-table data/lang_nosp/phones.txt \
+    data/local/dict "<unk>" data/local/lang data/lang
 
   # and convert the LM in arpa to G.fst
   utils/format_lm.sh \
-		data/lang_test $LM data/local/dict/lexicon.txt data/lang_test
+    data/lang $LM data/local/dict/lexicon.txt data/lang
 fi
 
 if [ $stage -le 14 ]; then
-  utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
+  utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph
   for dset in ${test_sets}; do
     steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
 		    exp/tri2/graph data/${dset} exp/tri2/decode_${dset} &
@@ -217,7 +217,7 @@ if [ $stage -le 15 ]; then
 fi
 
 if [ $stage -le 16 ]; then
-  utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph
+  utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph
   for dset in ${test_sets}; do
     steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
 			  exp/tri3/graph data/${dset} exp/tri3/decode_${dset} &

From 5a3b8b15c3e5ac05e615ed70b7972679ed51ffe1 Mon Sep 17 00:00:00 2001
From: Jan Trmal <jtrmal@gmail.com>
Date: Thu, 8 Mar 2018 11:05:56 -0500
Subject: [PATCH 04/10] fixing some issues

---
 .../s5/conf/{chime5.cfg => beamformit.cfg}    |  0
 egs/chime5/s5/conf/decode.config              |  2 --
 egs/chime5/s5/local/check_tools.sh            | 15 ++++++++----
 egs/chime5/s5/local/prepare_data.sh           | 16 +++++++++++++
 egs/chime5/s5/local/prepare_dict.sh           |  1 -
 egs/chime5/s5/local/run_beamformit.sh         |  8 +++----
 egs/chime5/s5/local/train_lms_srilm.sh        | 23 ++++++-------------
 7 files changed, 37 insertions(+), 28 deletions(-)
 rename egs/chime5/s5/conf/{chime5.cfg => beamformit.cfg} (100%)
 delete mode 100644 egs/chime5/s5/conf/decode.config

diff --git a/egs/chime5/s5/conf/chime5.cfg b/egs/chime5/s5/conf/beamformit.cfg
similarity index 100%
rename from egs/chime5/s5/conf/chime5.cfg
rename to egs/chime5/s5/conf/beamformit.cfg
diff --git a/egs/chime5/s5/conf/decode.config b/egs/chime5/s5/conf/decode.config
deleted file mode 100644
index 1940883b2f7..00000000000
--- a/egs/chime5/s5/conf/decode.config
+++ /dev/null
@@ -1,2 +0,0 @@
-beam=11.0 # beam for decoding.  Was 13.0 in the scripts.
-first_beam=8.0 # beam for 1st-pass decoding in SAT.
diff --git a/egs/chime5/s5/local/check_tools.sh b/egs/chime5/s5/local/check_tools.sh
index ef2fe9d5e5d..ff2c53ea3d8 100755
--- a/egs/chime5/s5/local/check_tools.sh
+++ b/egs/chime5/s5/local/check_tools.sh
@@ -17,12 +17,19 @@
 
 [ -f ./path.sh ] && . ./path.sh
 
-uconv=`command -v uconv 2>/dev/null` \
+command -v uconv &>/dev/null \
   || { echo  >&2 "uconv not found on PATH. You will have to install ICU4C"; exit 1; }
 
-srilm=`command -v ngram 2>/dev/null` \
+command -v ngram &>/dev/null \
   || { echo  >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh to install it"; exit 1; }
 
+if [  -z ${LIBLBFGS} ]; then
+  echo >&2  "SRILM is not compiled with the support of MaxEnt models."
+  echo >&2  "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh"
+  echo >&2  "which will take care of compiling the SRILM with MaxEnt support"
+  exit 1;
+fi
+
 sox=`command -v sox 2>/dev/null` \
   || { echo  >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; }
 
@@ -35,10 +42,10 @@ if [ ! -z "$sox" ]; then
   fi
 fi
 
-phalign=`command -v phonetisaurus-align 2>/dev/null` \
+command -v phonetisaurus-align &>/dev/null \
   || { echo  >&2 "Phonetisaurus not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_phonetisaurus.sh to install it"; exit 1; }
 
-beamformit=`command -v BeamformIt 2>/dev/null` \
+command -v BeamformIt &>/dev/null \
   || { echo  >&2 "BeamformIt not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_beamformit.sh to install it"; exit 1; }
 
 exit  0
diff --git a/egs/chime5/s5/local/prepare_data.sh b/egs/chime5/s5/local/prepare_data.sh
index c273b1b42f2..af37b0841fa 100755
--- a/egs/chime5/s5/local/prepare_data.sh
+++ b/egs/chime5/s5/local/prepare_data.sh
@@ -20,10 +20,26 @@ if [ $# -ne 3 ] ; then
   exit 1
 fi
 
+set -e -o pipefail
+
 adir=$1
 jdir=$2
 dir=$3
 
+json_count=$(find $jdir -name "*.json" | wc -l)
+wav_count=$(find $adir -name "*.wav" | wc -l)
+
+if [ "$json_count" -eq 0 ]; then
+  echo >&2 "We expect that the directory $jdir will contain json files."
+  echo >&2 "That implies you have supplied a wrong path to the data."
+  exit 1
+fi
+if [ "$wav_count" -eq 0 ]; then
+  echo >&2 "We expect that the directory $adir will contain wav files."
+  echo >&2 "That implies you have supplied a wrong path to the data."
+  exit 1
+fi
+
 echo "$0: Converting transcription to text"
 
 mkdir -p $dir
diff --git a/egs/chime5/s5/local/prepare_dict.sh b/egs/chime5/s5/local/prepare_dict.sh
index 468f2e915d0..e2a16b92f7d 100755
--- a/egs/chime5/s5/local/prepare_dict.sh
+++ b/egs/chime5/s5/local/prepare_dict.sh
@@ -87,7 +87,6 @@ awk '{print $1}' $dir/iv_lexicon.txt | \
      if (!defined $seen{$w}) { print; }
    } ' $dir/word_counts > $dir/oov_counts.txt
 
-set -x
 echo "*Highest-count OOVs (including fragments) are:"
 head -n 10 $dir/oov_counts.txt
 echo "*Highest-count OOVs (excluding fragments) are:"
diff --git a/egs/chime5/s5/local/run_beamformit.sh b/egs/chime5/s5/local/run_beamformit.sh
index 78f740339fa..176fd108d5d 100755
--- a/egs/chime5/s5/local/run_beamformit.sh
+++ b/egs/chime5/s5/local/run_beamformit.sh
@@ -25,11 +25,9 @@ odir=$2
 array=$3
 expdir=exp/enhan/`echo $odir | awk -F '/' '{print $NF}'`_`echo $bmf | tr ' ' '_'`
 
-if [ -z $BEAMFORMIT ] ; then
-  export BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt
+if ! command  -v BeamformIt &>/dev/null ; then
+  echo "Missing BeamformIt, run 'cd $KALDI_ROOT/tools/; ./extras/install_beamformit.sh; cd -;'" && exit 1
 fi
-export PATH=${PATH}:$BEAMFORMIT
-! hash BeamformIt && echo "Missing BeamformIt, run 'cd $KALDI_ROOT/tools/; ./extras/install_beamformit.sh; cd -;'" && exit 1
 
 # Set bash to 'debug' mode, it will exit on :
 # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
@@ -75,7 +73,7 @@ for n in `seq $nj`; do
 cat << EOF > $expdir/log/beamform.$n.sh
 while read line; do
   $BEAMFORMIT/BeamformIt -s \$line -c $input_arrays \
-    --config_file `pwd`/conf/chime5.cfg \
+    --config_file `pwd`/conf/beamformit.cfg \
     --source_dir $sdir \
     --result_dir $odir
 done < $output_wavfiles.$n
diff --git a/egs/chime5/s5/local/train_lms_srilm.sh b/egs/chime5/s5/local/train_lms_srilm.sh
index 09bba818ba6..8caa251fa35 100755
--- a/egs/chime5/s5/local/train_lms_srilm.sh
+++ b/egs/chime5/s5/local/train_lms_srilm.sh
@@ -40,20 +40,9 @@ tgtdir=$2
 ##End of configuration
 loc=`which ngram-count`;
 if [ -z $loc ]; then
-  if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
-    sdir=`pwd`/../../../tools/srilm/bin/i686-m64
-  else
-    sdir=`pwd`/../../../tools/srilm/bin/i686
-  fi
-  if [ -f $sdir/ngram-count ]; then
-    echo Using SRILM tools from $sdir
-    export PATH=$PATH:$sdir
-  else
-    echo You appear to not have SRILM tools installed, either on your path,
-    echo or installed in $sdir.  See tools/install_srilm.sh for installation
-    echo instructions.
-    exit 1
-  fi
+  echo >&2 "You appear to not have SRILM tools installed, either on your path,"
+  echo >&2 "Use the script \$KALDI_ROOT/tools/install_srilm.sh to install it."
+  exit 1
 fi
 
 # Prepare the destination directory
@@ -122,7 +111,6 @@ else
     echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'`
 fi
 
-set -x
 # Kaldi transcript files contain Utterance_ID as the first word; remove it
 sed -e "s/\.CH.//" -e "s/_.\-./_/" $dev_text | sort -u | \
   perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/dev.txt
@@ -247,7 +235,10 @@ if [ ! -z ${LIBLBFGS} ]; then
     ngram -lm - -order 4 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\
     sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1
 else
-  echo "Skipping MaxEnt models"
+  echo >&2  "SRILM is not compiled with the support of MaxEnt models."
+  echo >&2  "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh"
+  echo >&2  "which will take care of compiling the SRILM with MaxEnt support"
+  exit 1;
 fi
 
 

From 196cb77a0b0ab9ccb4ab6a3ebe78471ef8b24a3d Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Fri, 9 Mar 2018 10:54:52 -0500
Subject: [PATCH 05/10] [egs] fixed chain related path issues and reflected
 Dan's comments

---
 egs/chime5/s5/local/chain/run_tdnn.sh                  |  2 +-
 .../chain/tuning/{run_tdnn_1e.sh => run_tdnn_1a.sh}    | 10 ++++------
 egs/chime5/s5/local/nnet3/run_ivector_common.sh        |  2 +-
 egs/chime5/s5/run.sh                                   |  4 ++--
 4 files changed, 8 insertions(+), 10 deletions(-)
 rename egs/chime5/s5/local/chain/tuning/{run_tdnn_1e.sh => run_tdnn_1a.sh} (97%)

diff --git a/egs/chime5/s5/local/chain/run_tdnn.sh b/egs/chime5/s5/local/chain/run_tdnn.sh
index 75da1a0a553..34499362831 120000
--- a/egs/chime5/s5/local/chain/run_tdnn.sh
+++ b/egs/chime5/s5/local/chain/run_tdnn.sh
@@ -1 +1 @@
-tuning/run_tdnn_1e.sh
\ No newline at end of file
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
similarity index 97%
rename from egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh
rename to egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index ba8779bcc77..7c599e9ee8a 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,8 +1,5 @@
 #!/bin/bash
 
-# 1e is as 1d but instead of the --proportional-shrink option, using
-#  the newly added xconfig-layer-specific 'l2-regularize' options.
-
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
 
@@ -11,10 +8,10 @@ set -euo pipefail
 stage=0
 nj=96
 train_set=train_worn_u100k
-test_sets="dev_worn eval_worn dev_beamformit_ref eval_beamformit_ref"
+test_sets="dev_worn dev_beamformit_ref"
 gmm=tri3
 nnet3_affix=_train_worn_u100k
-lm_suffix=_chime5_tg
+lm_suffix=
 
 # The rest are configs specific to this script.  Most of the parameters
 # are just hardcoded at this level, in the commands below.
@@ -62,6 +59,7 @@ fi
 # run those things.
 local/nnet3/run_ivector_common.sh --stage $stage \
                                   --train-set $train_set \
+				  --test-sets $test_sets \
                                   --gmm $gmm \
                                   --nnet3-affix "$nnet3_affix" || exit 1;
 
@@ -137,7 +135,7 @@ if [ $stage -le 13 ]; then
   num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   opts="l2-regularize=0.05"
-  output_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.01 bottleneck-dim=320"
 
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
diff --git a/egs/chime5/s5/local/nnet3/run_ivector_common.sh b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
index 58f29f479bc..e28e5ce996d 100755
--- a/egs/chime5/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
@@ -10,7 +10,7 @@ set -euo pipefail
 
 stage=0
 train_set=train_worn_u100k
-test_sets="dev_worn eval_worn dev_beamformit_ref eval_beamformit_ref"
+test_sets="dev_worn dev_beamformit_ref"
 gmm=tri3
 nj=96
 
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index 11824af2c31..a2c16a98bd3 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -31,7 +31,7 @@ audio_dir=${chime5_corpus}/audio
 train_set=train_worn_u100k
 test_sets="dev_worn dev_${enhancement}_ref"
 # use the below once you obtain the evaluation data. Also remove the comment #eval# in the lines below
-#eval#test_sets="dev_worn eval_worn dev_${enhancement}_ref eval_${enhancement}_ref"
+#eval#test_sets="dev_worn dev_${enhancement}_ref eval_${enhancement}_ref"
 
 # This script also needs the phonetisaurus g2p, srilm, beamformit
 ./local/check_tools.sh || exit 1
@@ -234,5 +234,5 @@ fi
 
 if [ $stage -le 18 ]; then
   # chain TDNN
-  local/chain/run_tdnn.sh --nj ${nj} --test_sets "$test_sets"
+  local/chain/run_tdnn.sh --nj ${nj} --train_set ${train_set}_cleaned --test_sets "$test_sets" --gmm tri3_cleaned --nnet3_affix _${train_set}_cleaned
 fi

From 87970657763b5c7340300e209fdfa405340afe80 Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Fri, 9 Mar 2018 17:30:24 -0500
Subject: [PATCH 06/10] [egs] added location tags for future scoring. also
 changed the left and right channel information according to the other channel
 information format

---
 egs/chime5/s5/local/json2text.py    | 14 ++++++++++----
 egs/chime5/s5/local/prepare_data.sh | 25 ++++++++++++++-----------
 egs/chime5/s5/run.sh                |  5 +++--
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/egs/chime5/s5/local/json2text.py b/egs/chime5/s5/local/json2text.py
index a3b81fd7067..4df0160efb6 100755
--- a/egs/chime5/s5/local/json2text.py
+++ b/egs/chime5/s5/local/json2text.py
@@ -50,6 +50,12 @@ def hms_to_seconds(hms):
             else:
                 mictype = args.mictype.upper() # convert from u01 to U01
 
+            # add location tag for scoring (only for dev and eval sets)
+            if 'location' in x.keys():
+                location = x['location'].upper()
+            else:
+                location = 'NOLOCATION'
+
             start_time = x['start_time'][mictype]
             end_time = x['end_time'][mictype]
         
@@ -69,10 +75,10 @@ def hms_to_seconds(hms):
             start_time = hms_to_seconds(start_time)
             end_time = hms_to_seconds(end_time)
 
-            if args.mictype == 'worn':
-                uttid = speaker_id + '_' + session_id + '-' + start_time + '-' + end_time
-            else:
-                uttid = speaker_id + '_' + session_id + '_' + mictype + '-' + start_time + '-' + end_time
+            uttid = speaker_id + '_' + session_id
+            if not args.mictype == 'worn':
+                uttid += '_' + mictype
+            uttid += '_' + location + '-' + start_time + '-' + end_time
 
             if end_time > start_time:
                 sys.stdout.buffer.write((uttid + ' ' + words + '\n').encode("utf-8"))
diff --git a/egs/chime5/s5/local/prepare_data.sh b/egs/chime5/s5/local/prepare_data.sh
index af37b0841fa..a037f371e34 100755
--- a/egs/chime5/s5/local/prepare_data.sh
+++ b/egs/chime5/s5/local/prepare_data.sh
@@ -54,7 +54,7 @@ echo "$0: Creating datadir $dir for type=\"$mictype\""
 
 if [ $mictype == "worn" ]; then
   # convert the filenames to wav.scp format, use the basename of the file
-  # as a the wav.scp key, add _L and _R for left and right channel
+  # as a the wav.scp key, add .L and .R for left and right channel
   # i.e. each file will have two entries (left and right channel)
   find $adir -name  "S[0-9]*_P[0-9]*.wav" | \
     perl -ne '{
@@ -64,17 +64,17 @@ if [ $mictype == "worn" ]; then
       @F = split "/", $path;
       ($f = $F[@F-1]) =~ s/.wav//;
       @F = split "_", $f;
-      print "${F[1]}_${F[0]}_L sox $path -t wav - remix 1 |\n";
-      print "${F[1]}_${F[0]}_R sox $path -t wav - remix 2 |\n";
+      print "${F[1]}_${F[0]}.L sox $path -t wav - remix 1 |\n";
+      print "${F[1]}_${F[0]}.R sox $path -t wav - remix 2 |\n";
     }' | sort > $dir/wav.scp
 
   # generate the transcripts for both left and right channel
   # from the original transcript in the form
   # P09_S03-0006072-0006147 gimme the baker
   # create left and right channel transcript
-  # P09_S03_L-0006072-0006147 gimme the baker
-  # P09_S03_R-0006072-0006147 gimme the baker
-  sed -n 's/  *$//; h; s/-/_L-/p; g; s/-/_R-/p' $dir/text.orig | sort > $dir/text
+  # P09_S03.L-0006072-0006147 gimme the baker
+  # P09_S03.R-0006072-0006147 gimme the baker
+  sed -n 's/  *$//; h; s/-/\.L-/p; g; s/-/\.R-/p' $dir/text.orig | sort > $dir/text
 elif [ $mictype == "ref" ]; then
   # fixed reference array
 
@@ -98,10 +98,10 @@ else
   # convert the transcripts from
   # P09_S03-0006072-0006147 gimme the baker
   # to the per-channel transcripts
-  # P09_S03_U01.CH1-0006072-0006147 gimme the baker
-  # P09_S03_U01.CH2-0006072-0006147 gimme the baker
-  # P09_S03_U01.CH3-0006072-0006147 gimme the baker
-  # P09_S03_U01.CH4-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH1-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH2-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH3-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH4-0006072-0006147 gimme the baker
   perl -ne '$l=$_;
     for($i=1; $i<=4; $i++) {
       ($x=$l)=~ s/-/.CH\Q$i\E-/;
@@ -113,15 +113,18 @@ $cleanup && rm -f $dir/text.* $dir/wav.scp.* $dir/wav.flist
 # Prepare 'segments', 'utt2spk', 'spk2utt'
 if [ $mictype == "worn" ]; then
   cut -d" " -f 1 $dir/text | \
-    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" \
     > $dir/segments
 elif [ $mictype == "ref" ]; then
   cut -d" " -f 1 $dir/text | \
     awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" |\
     sed -e "s/ P.._/ /" > $dir/segments
 else
   cut -d" " -f 1 $dir/text | \
     awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" |\
     sed -e 's/ P.._/ /' > $dir/segments
 fi
 cut -f 1 -d ' ' $dir/segments | \
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index a2c16a98bd3..5bb037efd9a 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -95,13 +95,14 @@ fi
 
 if [ $stage -le 5 ]; then
   # remove possibly bad sessions (P11_S03, P52_S19, P53_S24, P54_S24)
+  # see http://spandh.dcs.shef.ac.uk/chime_challenge/data.html for more details
   utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
   grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
   utils/fix_data_dir.sh data/train_worn
 
   # combine mix array and worn mics
   # randomly extract first 100k utterances from all mics
-  # If you want to include more training data, you can increase the number of array mic utterances
+  # if you want to include more training data, you can increase the number of array mic utterances
   utils/combine_data.sh data/train_uall data/train_u01 data/train_u02 data/train_u04 data/train_u05 data/train_u06
   utils/subset_data_dir.sh data/train_uall 100000 data/train_u100k
   utils/combine_data.sh data/${train_set} data/train_worn data/train_u100k
@@ -111,7 +112,7 @@ if [ $stage -le 5 ]; then
   #eval#for dset in train dev eval; do
   for dset in train dev; do
     utils/copy_data_dir.sh data/${dset}_worn data/${dset}_worn_stereo
-    grep "_L-" data/${dset}_worn_stereo/text > data/${dset}_worn/text
+    grep "\.L-" data/${dset}_worn_stereo/text > data/${dset}_worn/text
     utils/fix_data_dir.sh data/${dset}_worn
   done
 fi

From d5d93fead3b949a16009f749e5a10be524a67f9f Mon Sep 17 00:00:00 2001
From: Jan Trmal <jtrmal@gmail.com>
Date: Sat, 10 Mar 2018 02:39:10 -0500
Subject: [PATCH 07/10] forward the test set names to ivector_common

---
 egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh | 1 +
 egs/chime5/s5/run.sh                            | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index 7c599e9ee8a..cb7cea9310d 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -58,6 +58,7 @@ fi
 # nnet3 setup, and you can skip them by setting "--stage 11" if you have already
 # run those things.
 local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --test-sets "$test_sets" \
                                   --train-set $train_set \
 				  --test-sets $test_sets \
                                   --gmm $gmm \
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index 5bb037efd9a..9a57289d592 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -235,5 +235,5 @@ fi
 
 if [ $stage -le 18 ]; then
   # chain TDNN
-  local/chain/run_tdnn.sh --nj ${nj} --train_set ${train_set}_cleaned --test_sets "$test_sets" --gmm tri3_cleaned --nnet3_affix _${train_set}_cleaned
+  local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set}_cleaned --test-sets "$test_sets" --gmm tri3_cleaned --nnet3-affix _${train_set}_cleaned
 fi

From ffbe47eea3bb5bf7ccdbe151cdf129351435cba1 Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Sat, 10 Mar 2018 18:21:08 -0500
Subject: [PATCH 08/10] [egs] removed lexicon update

---
 .../s5/local/chain/tuning/run_tdnn_1a.sh      |  4 +-
 egs/chime5/s5/local/prepare_dict.sh           |  2 +-
 egs/chime5/s5/run.sh                          | 53 +++++--------------
 3 files changed, 17 insertions(+), 42 deletions(-)

diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index 7c599e9ee8a..cb063420593 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -15,7 +15,7 @@ lm_suffix=
 
 # The rest are configs specific to this script.  Most of the parameters
 # are just hardcoded at this level, in the commands below.
-affix=1e   # affix for the TDNN directory name
+affix=1a   # affix for the TDNN directory name
 tree_affix=
 train_stage=-10
 get_egs_stage=-10
@@ -59,7 +59,7 @@ fi
 # run those things.
 local/nnet3/run_ivector_common.sh --stage $stage \
                                   --train-set $train_set \
-				  --test-sets $test_sets \
+				  --test-sets "$test_sets" \
                                   --gmm $gmm \
                                   --nnet3-affix "$nnet3_affix" || exit 1;
 
diff --git a/egs/chime5/s5/local/prepare_dict.sh b/egs/chime5/s5/local/prepare_dict.sh
index e2a16b92f7d..09083d0e795 100755
--- a/egs/chime5/s5/local/prepare_dict.sh
+++ b/egs/chime5/s5/local/prepare_dict.sh
@@ -24,7 +24,7 @@ set -o nounset                              # Treat unset variables as an error
 # check existing directories
 [ $# != 0 ] && echo "Usage: $0" && exit 1;
 
-dir=data/local/dict_nosp
+dir=data/local/dict
 
 mkdir -p $dir
 echo "$0: Getting CMU dictionary"
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index 5bb037efd9a..b586231f7e6 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -56,11 +56,11 @@ if [ $stage -le 2 ]; then
   local/prepare_dict.sh
 
   utils/prepare_lang.sh \
-    data/local/dict_nosp "<unk>" data/local/lang_nosp data/lang_nosp
+    data/local/dict "<unk>" data/local/lang data/lang
 
   local/train_lms_srilm.sh \
     --train-text data/train_worn/text --dev-text data/dev_worn/text \
-    --oov-symbol "<unk>" --words-file data/lang_nosp/words.txt \
+    --oov-symbol "<unk>" --words-file data/lang/words.txt \
     data/ data/srilm
 fi
 
@@ -68,7 +68,7 @@ LM=data/srilm/best_3gram.gz
 if [ $stage -le 3 ]; then
   # Compiles G for chime5 trigram LM
   utils/format_lm.sh \
-		data/lang_nosp $LM data/local/dict_nosp/lexicon.txt data/lang_nosp_test
+		data/lang $LM data/local/dict/lexicon.txt data/lang
 
 fi
 
@@ -148,59 +148,34 @@ fi
 if [ $stage -le 9 ]; then
   # Starting basic training on MFCC features
   steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
-		      data/${train_set}_30kshort data/lang_nosp exp/mono
+		      data/${train_set}_30kshort data/lang exp/mono
 fi
 
 if [ $stage -le 10 ]; then
   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
-		    data/${train_set} data/lang_nosp exp/mono exp/mono_ali
+		    data/${train_set} data/lang exp/mono exp/mono_ali
 
   steps/train_deltas.sh --cmd "$train_cmd" \
-			2500 30000 data/${train_set} data/lang_nosp exp/mono_ali exp/tri1
+			2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1
 fi
 
 if [ $stage -le 11 ]; then
   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
-		    data/${train_set} data/lang_nosp exp/tri1 exp/tri1_ali
+		    data/${train_set} data/lang exp/tri1 exp/tri1_ali
 
   steps/train_lda_mllt.sh --cmd "$train_cmd" \
-			  4000 50000 data/${train_set} data/lang_nosp exp/tri1_ali exp/tri2
+			  4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2
 fi
 
 if [ $stage -le 12 ]; then
-  utils/mkgraph.sh data/lang_nosp_test exp/tri2 exp/tri2/graph_nosp
+  utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph
   for dset in ${test_sets}; do
     steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
-		    exp/tri2/graph_nosp data/${dset} exp/tri2/decode_${dset}_nosp
+		    exp/tri2/graph data/${dset} exp/tri2/decode_${dset}
   done
 fi
 
 if [ $stage -le 13 ]; then
-  # create a more refined lexicon (include pronunciation probabilities)
-  steps/get_prons.sh --cmd "$train_cmd" \
-		data/${train_set} data/lang_nosp exp/tri2
-
-  utils/dict_dir_add_pronprobs.sh --max-normalize true \
-    data/local/dict_nosp exp/tri2/pron_counts_nowb.txt \
-    exp/tri2/sil_counts_nowb.txt \
-    exp/tri2/pron_bigram_counts_nowb.txt data/local/dict
-
-  # add explicit phone loop for <unk> model
-  utils/lang/make_unk_lm.sh --use-pocolm false \
-    data/local/dict exp/make_unk
-
-  # and compile the lang directory
-  utils/prepare_lang.sh \
-    --unk-fst exp/make_unk/unk_fst.txt \
-    --phone-symbol-table data/lang_nosp/phones.txt \
-    data/local/dict "<unk>" data/local/lang data/lang
-
-  # and convert the LM in arpa to G.fst
-  utils/format_lm.sh \
-    data/lang $LM data/local/dict/lexicon.txt data/lang
-fi
-
-if [ $stage -le 14 ]; then
   utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph
   for dset in ${test_sets}; do
     steps/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
@@ -209,7 +184,7 @@ if [ $stage -le 14 ]; then
   wait
 fi
 
-if [ $stage -le 15 ]; then
+if [ $stage -le 14 ]; then
   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
 		    data/${train_set} data/lang exp/tri2 exp/tri2_ali
 
@@ -217,7 +192,7 @@ if [ $stage -le 15 ]; then
 		     5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3
 fi
 
-if [ $stage -le 16 ]; then
+if [ $stage -le 15 ]; then
   utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph
   for dset in ${test_sets}; do
     steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
@@ -226,14 +201,14 @@ if [ $stage -le 16 ]; then
   wait
 fi
 
-if [ $stage -le 17 ]; then
+if [ $stage -le 16 ]; then
   # The following script cleans the data and produces cleaned data
   steps/cleanup/clean_and_segment_data.sh --nj ${nj} --cmd "$train_cmd" \
     --segmentation-opts "--min-segment-length 0.3 --min-new-segment-length 0.6" \
     data/${train_set} data/lang exp/tri3 exp/tri3_cleaned data/${train_set}_cleaned
 fi
 
-if [ $stage -le 18 ]; then
+if [ $stage -le 17 ]; then
   # chain TDNN
   local/chain/run_tdnn.sh --nj ${nj} --train_set ${train_set}_cleaned --test_sets "$test_sets" --gmm tri3_cleaned --nnet3_affix _${train_set}_cleaned
 fi

From 9b936cacf38abd404faca276b9d4d2266871b2c3 Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Sat, 10 Mar 2018 18:25:03 -0500
Subject: [PATCH 09/10] [egs] merge Yenda's update

---
 egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index 8a6106292d5..cb063420593 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -58,7 +58,6 @@ fi
 # nnet3 setup, and you can skip them by setting "--stage 11" if you have already
 # run those things.
 local/nnet3/run_ivector_common.sh --stage $stage \
-                                  --test-sets "$test_sets" \
                                   --train-set $train_set \
 				  --test-sets "$test_sets" \
                                   --gmm $gmm \

From bc2f8f62cc0f3babdb13e71c0f6ef283e2612434 Mon Sep 17 00:00:00 2001
From: Shinji Watanabe <sw005320@gmail.com>
Date: Tue, 13 Mar 2018 09:14:11 -0400
Subject: [PATCH 10/10] [egs] added RESULTS

---
 egs/chime5/s5/RESULTS                           | 15 +++++++++------
 egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/egs/chime5/s5/RESULTS b/egs/chime5/s5/RESULTS
index b57787a0798..941b63ece52 100644
--- a/egs/chime5/s5/RESULTS
+++ b/egs/chime5/s5/RESULTS
@@ -1,10 +1,13 @@
 
 # tri2
-%WER 92.26 [ 60741 / 65835, 3212 ins, 35241 del, 22288 sub ] exp/tri2/decode_dev_beamformit_ref/wer_16_1.0
-%WER 76.47 [ 50342 / 65835, 4356 ins, 19004 del, 26982 sub ] exp/tri2/decode_dev_worn/wer_14_1.0
+%WER 76.40 [ 44985 / 58881, 3496 ins, 17652 del, 23837 sub ] exp/tri2/decode_dev_worn/wer_13_1.0
+%WER 93.56 [ 55091 / 58881, 2132 ins, 35555 del, 17404 sub ] exp/tri2/decode_dev_beamformit_ref/wer_17_1.0
 
 # tri3
-%WER 92.43 [ 60852 / 65835, 3149 ins, 35536 del, 22167 sub ] exp/tri3/decode_dev_beamformit_ref.si/wer_17_1.0
-%WER 90.80 [ 59779 / 65835, 4742 ins, 27968 del, 27069 sub ] exp/tri3/decode_dev_beamformit_ref/wer_17_1.0
-%WER 76.38 [ 50283 / 65835, 3911 ins, 19081 del, 27291 sub ] exp/tri3/decode_dev_worn.si/wer_17_1.0
-%WER 73.13 [ 48146 / 65835, 4727 ins, 17274 del, 26145 sub ] exp/tri3/decode_dev_worn/wer_16_1.0
+%WER 72.81 [ 42869 / 58881, 3629 ins, 15998 del, 23242 sub ] exp/tri3/decode_dev_worn/wer_15_1.0
+%WER 91.73 [ 54013 / 58881, 3519 ins, 27098 del, 23396 sub ] exp/tri3/decode_dev_beamformit_ref/wer_17_1.0
+
+# nnet3 tdnn+chain
+%WER 47.91 [ 28212 / 58881, 2843 ins, 8957 del, 16412 sub ] exp/chain_train_worn_u100k_cleaned/tdnn1a_sp/decode_dev_worn/wer_9_0.0
+%WER 81.28 [ 47859 / 58881, 4210 ins, 27511 del, 16138 sub ] exp/chain_train_worn_u100k_cleaned/tdnn1a_sp/decode_dev_beamformit_ref/wer_9_0.5
+
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index cb063420593..45a7fd84bd6 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -36,7 +36,7 @@ remove_egs=true
 reporting_email=
 
 #decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
+test_online_decoding=false  # if true, it will run the last decoding stage.
 
 
 # End configuration section.