diff --git a/.gitignore b/.gitignore
index 5764bfe22c6..9f219d458a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,6 +77,8 @@ GSYMS
 /egs/*/*/plp
 /egs/*/*/exp
 /egs/*/*/data
+/egs/*/*/wav
+/egs/*/*/enhan
 
 # /tools/
 /tools/pocolm/
diff --git a/egs/chime5/s5b/local/nnet3/compare_wer.sh b/egs/chime5/s5b/local/nnet3/compare_wer.sh
old mode 100755
new mode 100644
index 095e85cc338..fa627acd27b
--- a/egs/chime5/s5b/local/nnet3/compare_wer.sh
+++ b/egs/chime5/s5b/local/nnet3/compare_wer.sh
@@ -130,3 +130,4 @@ done
 echo
 
 echo
+
diff --git a/egs/chime5/s5b/local/nnet3/decode.sh b/egs/chime5/s5b/local/nnet3/decode.sh
index 7af09f36a13..8fa54e0d4a6 100755
--- a/egs/chime5/s5b/local/nnet3/decode.sh
+++ b/egs/chime5/s5b/local/nnet3/decode.sh
@@ -35,6 +35,8 @@ post_decode_acwt=1.0 # important to change this when using chain models
 extra_left_context_initial=0
 extra_right_context_final=0
 
+graph_affix=
+
 score_opts="--min-lmwt 6 --max-lmwt 13"
 
 . ./cmd.sh
@@ -94,7 +96,7 @@ if [ $stage -le 2 ]; then
   fi
 fi
 
-decode_dir=$dir/decode_${data_set}${affix}
+decode_dir=$dir/decode${graph_affix}_${data_set}${affix}
 # generate the lattices
 if [ $stage -le 3 ]; then
   echo "Generating lattices, stage 1"
diff --git a/egs/chime5/s5b/local/run_recog.sh b/egs/chime5/s5b/local/run_recog.sh
index 5c74c9ff242..989a5f95d01 100755
--- a/egs/chime5/s5b/local/run_recog.sh
+++ b/egs/chime5/s5b/local/run_recog.sh
@@ -28,8 +28,8 @@ json_dir=${chime5_corpus}/transcriptions
 audio_dir=${chime5_corpus}/audio
 
 # training and test data
-train_set=train_worn_u100k
-test_sets="eval_${enhancement}_ref"
+train_set=train_worn_simu_u400k
+test_sets="eval_${enhancement}_dereverb_ref"
 
 # This script also needs the phonetisaurus g2p, srilm, beamformit
 ./local/check_tools.sh || exit 1
@@ -38,18 +38,27 @@ if [ $stage -le 4 ]; then
   # Beamforming using reference arrays
   # enhanced WAV directory
   enhandir=enhan
+  dereverb_dir=${PWD}/wav/wpe/
   for dset in eval; do
     for mictype in u01 u02 u03 u04 u05 u06; do
-      local/run_beamformit.sh --cmd "$train_cmd" \
+      local/run_wpe.sh --nj 4 --cmd "$train_cmd --mem 120G" \
 			      ${audio_dir}/${dset} \
+			      ${dereverb_dir}/${dset} \
+			      ${mictype}
+    done
+  done
+  for dset in dev eval; do
+    for mictype in u01 u02 u03 u04 u05 u06; do
+      local/run_beamformit.sh --cmd "$train_cmd" \
+			      ${dereverb_dir}/${dset} \
 			      ${enhandir}/${dset}_${enhancement}_${mictype} \
 			      ${mictype}
     done
   done
-  
+
   for dset in eval; do
     local/prepare_data.sh --mictype ref "$PWD/${enhandir}/${dset}_${enhancement}_u0*" \
-			  ${json_dir}/${dset} data/${dset}_${enhancement}_ref
+			  ${json_dir}/${dset} data/${dset}_${enhancement}_dereverb_ref
   done
 fi
 
@@ -92,28 +101,13 @@ if [ $stage -le 7 ]; then
   done
 fi
 
-if [ $stage -le 17 ]; then
-  nnet3_affix=_${train_set}_cleaned
-  for datadir in ${test_sets}; do
-    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
-  done
-  for datadir in ${test_sets}; do
-    steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf \
-      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
-    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
-    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
-  done
-  for data in $test_sets; do
-    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
-      data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
-      exp/nnet3${nnet3_affix}/ivectors_${data}_hires
-  done
-fi
+nnet3_affix=_${train_set}_cleaned_rvb
+
+lm_suffix=
 
 if [ $stage -le 18 ]; then
   # First the options that are passed through to run_ivector_common.sh
   # (some of which are also used in this script directly).
-  lm_suffix=
 
   # The rest are configs specific to this script.  Most of the parameters
   # are just hardcoded at this level, in the commands below.
@@ -138,16 +132,14 @@ if [ $stage -le 18 ]; then
 
   for data in $test_sets; do
     (
-      steps/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --extra-left-context $chunk_left_context \
-          --extra-right-context $chunk_right_context \
-          --extra-left-context-initial 0 \
-          --extra-right-context-final 0 \
-          --frames-per-chunk $frames_per_chunk \
-          --nj 8 --cmd "$decode_cmd"  --num-threads 4 \
-          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
-          $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1
+      local/nnet3/decode.sh --affix 2stage --pass2-decode-opts "--min-active 1000" \
+        --acwt 1.0 --post-decode-acwt 10.0 \
+        --frames-per-chunk 150 --nj $decode_nj \
+        --ivector-dir exp/nnet3${nnet3_affix} \
+        --graph-affix ${lm_suffix} \
+        data/${data} data/lang${lm_suffix} \
+        $tree_dir/graph${lm_suffix} \
+        exp/chain${nnet3_affix}/tdnn1b_sp 
     ) || touch $dir/.error &
   done
   wait
@@ -159,6 +151,6 @@ if [ $stage -le 20 ]; then
   # please specify both dev and eval set directories so that the search parameters
   # (insertion penalty and language model weight) will be tuned using the dev set
   local/score_for_submit.sh \
-      --dev exp/chain_${train_set}_cleaned/tdnn1a_sp/decode_dev_${enhancement}_ref \
-      --eval exp/chain_${train_set}_cleaned/tdnn1a_sp/decode_eval_${enhancement}_ref
+      --dev exp/chain${nnet3_affix}/tdnn1b_sp/decode${lm_suffix}_dev_${enhancement}_dereverb_ref_2stage \
+      --eval exp/chain${nnet3_affix}/tdnn1b_sp/decode${lm_suffix}_eval_${enhancement}_dereverb_ref_2stage
 fi
diff --git a/egs/chime5/s5b/local/run_wpe.sh b/egs/chime5/s5b/local/run_wpe.sh
index 1c4b1c80291..ed512e69aae 100755
--- a/egs/chime5/s5b/local/run_wpe.sh
+++ b/egs/chime5/s5b/local/run_wpe.sh
@@ -33,7 +33,8 @@ set -o pipefail
 
 miniconda_dir=$HOME/miniconda3/
 if [ ! -d $miniconda_dir ]; then
-    echo "$miniconda_dir does not exist. Please run '../../../tools/extras/install_miniconda.sh' and '../../../tools/extras/install_wpe.sh';"
+    echo "$miniconda_dir does not exist. Please run '$KALDI_ROOT/tools/extras/install_miniconda.sh'."
+    exit 1
 fi
 
 # check if WPE is installed
diff --git a/egs/chime6/README.txt b/egs/chime6/README.txt
new file mode 100644
index 00000000000..9fb48c26822
--- /dev/null
+++ b/egs/chime6/README.txt
@@ -0,0 +1,6 @@
+This is a kaldi recipe for the 6th CHiME Speech Separation and Recognition Challenge (CHiME-6).
+
+See http://spandh.dcs.shef.ac.uk/chime_challenge/ for more detailed information.
+
+s5_track1 : Track 1 of the challenge (oracle segments and speaker label is provided)
+s5_track2 : Track 2 of the challenge (only raw audio is provided)
diff --git a/egs/chime6/s5_track1/RESULTS b/egs/chime6/s5_track1/RESULTS
new file mode 100644
index 00000000000..73b47ddf3cc
--- /dev/null
+++ b/egs/chime6/s5_track1/RESULTS
@@ -0,0 +1,21 @@
+
+# tri2
+%WER 88.52 [ 52121 / 58881, 2023 ins, 30285 del, 19813 sub ] exp/tri2/decode_dev_gss/wer_17_0.5
+
+# tri3
+%WER 85.72 [ 50471 / 58881, 3079 ins, 23787 del, 23605 sub ] exp/tri3/decode_dev_gss/wer_17_0.5
+
+# nnet3 tdnn+chain
+%WER 41.21 [ 24267 / 58881, 2428 ins, 7606 del, 14233 sub ] exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn1b_sp/decode_dev_worn_2stage/wer_11_0.0
+%WER 51.76 [ 30474 / 58881, 2665 ins, 11749 del, 16060 sub ] exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn1b_sp/decode_dev_gss_multiarray_2stage/wer_10_0.0
+
+# result with the challenge submission format (Nov 17, 2019)
+# after the fix of speaker ID across arrays
+==== development set ====
+session S02 room DINING: #words 8288, #errors 4459, wer 53.80 %
+session S02 room KITCHEN: #words 12696, #errors 7170, wer 56.47 %
+session S02 room LIVING: #words 15460, #errors 7388, wer 47.78 %
+session S09 room DINING: #words 5766, #errors 3100, wer 53.76 %
+session S09 room KITCHEN: #words 8911, #errors 4483, wer 50.30 %
+session S09 room LIVING: #words 7760, #errors 3874, wer 49.92 %
+overall: #words 58881, #errors 30474, wer 51.75 %
diff --git a/egs/chime6/s5_track1/cmd.sh b/egs/chime6/s5_track1/cmd.sh
new file mode 100644
index 00000000000..9702501f1a7
--- /dev/null
+++ b/egs/chime6/s5_track1/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="retry.pl queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+
diff --git a/egs/chime6/s5_track1/conf/beamformit.cfg b/egs/chime6/s5_track1/conf/beamformit.cfg
new file mode 100755
index 00000000000..70fdd858651
--- /dev/null
+++ b/egs/chime6/s5_track1/conf/beamformit.cfg
@@ -0,0 +1,50 @@
+#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+####directories where to store/retrieve info####
+#channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+#show_id = Ttmp
+
diff --git a/egs/chime6/s5_track1/conf/mfcc.conf b/egs/chime6/s5_track1/conf/mfcc.conf
new file mode 100644
index 00000000000..32988403b00
--- /dev/null
+++ b/egs/chime6/s5_track1/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false
+--sample-frequency=16000
diff --git a/egs/chime6/s5_track1/conf/mfcc_hires.conf b/egs/chime6/s5_track1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..fd64b62eb16
--- /dev/null
+++ b/egs/chime6/s5_track1/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=16000 
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=40
+--high-freq=-400
diff --git a/egs/chime6/s5_track1/conf/online_cmvn.conf b/egs/chime6/s5_track1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/chime6/s5_track1/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/chime6/s5_track1/conf/queue.conf b/egs/chime6/s5_track1/conf/queue.conf
new file mode 100644
index 00000000000..73103195684
--- /dev/null
+++ b/egs/chime6/s5_track1/conf/queue.conf
@@ -0,0 +1,10 @@
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l hostname='!b19*'
+option gpu=* -l gpu=$0 -q g.q -l hostname='!b19*'
+
diff --git a/egs/chime6/s5_track1/local/add_location_to_uttid.sh b/egs/chime6/s5_track1/local/add_location_to_uttid.sh
new file mode 100755
index 00000000000..91bd0c0dd37
--- /dev/null
+++ b/egs/chime6/s5_track1/local/add_location_to_uttid.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Author: Ashish Arora
+# Apache 2.0
+
+. ./cmd.sh
+. ./path.sh
+
+enhancement=gss
+. utils/parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: local/add_location_to_uttid.sh [options] <json-transcription-in-dir>"
+   echo "                        <perutt-in-dir> <uttid-location-mapping-out-file>"
+   echo "main options (for others, see top of script file)"
+   echo "  --enhancement                    # enhancement type (gss or beamformit)"
+   exit 1;
+fi
+
+jdir=$1
+puttdir=$2
+utt_loc_file=$3
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+if [[ ${enhancement} == *gss* ]]; then
+  local/get_location.py $jdir > $utt_loc_file
+  local/replace_uttid.py $utt_loc_file $puttdir/per_utt > $puttdir/per_utt_loc
+fi
+
+if [[ ${enhancement} == *beamformit* ]]; then
+  cat $puttdir/per_utt > $puttdir/per_utt_loc
+fi
diff --git a/egs/chime6/s5_track1/local/chain/compare_wer.sh b/egs/chime6/s5_track1/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..cd6be14ed88
--- /dev/null
+++ b/egs/chime6/s5_track1/local/chain/compare_wer.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER dev_clean_2 (tgsmall) "
+  "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+    decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+     wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
diff --git a/egs/chime6/s5_track1/local/chain/run_tdnn.sh b/egs/chime6/s5_track1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..61f8f499182
--- /dev/null
+++ b/egs/chime6/s5_track1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1b.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..daad37e2cd7
--- /dev/null
+++ b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=96
+train_set=train_worn_u100k
+test_sets="dev_worn dev_beamformit_ref"
+gmm=tri3
+nnet3_affix=_train_worn_u100k
+lm_suffix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a   # affix for the TDNN directory name
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=140,100,160
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+
+#decode options
+test_online_decoding=false  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+				  --test-sets "$test_sets" \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj ${nj} --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 12 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 13 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  opts="l2-regularize=0.05"
+  output_opts="l2-regularize=0.01 bottleneck-dim=320"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=512
+  relu-batchnorm-layer name=tdnn2 $opts dim=512 input=Append(-1,0,1)
+  relu-batchnorm-layer name=tdnn3 $opts dim=512
+  relu-batchnorm-layer name=tdnn4 $opts dim=512 input=Append(-1,0,1)
+  relu-batchnorm-layer name=tdnn5 $opts dim=512
+  relu-batchnorm-layer name=tdnn6 $opts dim=512 input=Append(-3,0,3)
+  relu-batchnorm-layer name=tdnn7 $opts dim=512 input=Append(-3,0,3)
+  relu-batchnorm-layer name=tdnn8 $opts dim=512 input=Append(-6,-3,0)
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain $opts dim=512 target-rms=0.5
+  output-layer name=output include-log-softmax=false $output_opts dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn8 $opts dim=512 target-rms=0.5
+  output-layer name=output-xent $output_opts dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 14 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/chime5-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=10 \
+    --trainer.frames-per-iter=3000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=256,128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 15 ]; then
+  # Note: it's not important to give mkgraph.sh the lang directory with the
+  # matched topology (since it gets the topology file from the model).
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang${lm_suffix}/ \
+    $tree_dir $tree_dir/graph${lm_suffix} || exit 1;
+fi
+
+if [ $stage -le 16 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      steps/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj 8 --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+          $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+# Not testing the 'looped' decoding separately, because for
+# TDNN systems it would give exactly the same results as the
+# normal decoding.
+
+if $test_online_decoding && [ $stage -le 17 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+        --acwt 1.0 --post-decode-acwt 10.0 \
+        --nj 8 --cmd "$decode_cmd" \
+        $tree_dir/graph${lm_suffix} data/${data} ${dir}_online/decode${lm_suffix}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
new file mode 100755
index 00000000000..115e02f5ac9
--- /dev/null
+++ b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
@@ -0,0 +1,245 @@
+#!/bin/bash
+
+# This factorized TDNN (TDNN-F) script is adapted from SWBD recipe 7q.
+# It uses resnet-style skip connections.
+# For details, refer to the paper:
+# "Semi-Orthogonal Low-Rank Matrix Factorization for Deep Neural Networks", Daniel Povey, Gaofeng Cheng, Yiming Wang, Ke Li, Hainan Xu, Mahsa Yarmohamadi, Sanjeev Khudanpur, Interspeech 2018
+#%WER 51.76 [ 30474 / 58881, 2665 ins, 11749 del, 16060 sub ] exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn1b_sp/decode_dev_gss_multiarray_2stage/wer_10_0.0
+
+# steps/info/chain_dir_info.pl exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn_1b_sp
+# exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn1b_sp/: num-iters=429 nj=3..16 num-params=17.0M dim=40+100->2776 combine=-0.134->-0.133 (over 3) xent:train/valid[285,428,final]=(-2.37,-1.95,-1.95/-2.19,-1.90,-1.91) logprob:train/valid[285,428,final]=(-0.201,-0.125,-0.124/-0.198,-0.147,-0.148)
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=96
+train_set=train_worn_u400k
+test_sets="dev_worn dev_beamformit_ref"
+gmm=tri3
+nnet3_affix=_train_worn_u400k
+lm_suffix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1b   # affix for the TDNN directory name
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+num_epochs=4
+common_egs_dir=
+# training options
+# training chunk-options
+chunk_width=140,100,160
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.5@0.50,0'
+
+# training options
+srand=0
+remove_egs=true
+
+#decode options
+test_online_decoding=false  # if true, it will run the last decoding stage.
+skip_decoding=true
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --test-sets "$test_sets" \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj ${nj} --cmd "$train_cmd" --generate-ali-from-lats true \
+    ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 12 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $lat_dir $tree_dir
+fi
+
+if [ $stage -le 13 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 14 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/chime5-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$train_cmd --mem 4G" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule "$dropout_schedule" \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $chunk_width \
+    --trainer.num-chunk-per-minibatch 64 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.00025 \
+    --trainer.optimization.final-effective-lrate 0.000025 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir $dir  || exit 1;
+
+fi
+
+if [ $stage -le 15 ]; then
+  # Note: it's not important to give mkgraph.sh the lang directory with the
+  # matched topology (since it gets the topology file from the model).
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang${lm_suffix}/ \
+    $tree_dir $tree_dir/graph${lm_suffix} || exit 1;
+fi
+
+if [ $stage -le 16 ] && [[ $skip_decoding == "false" ]]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      steps/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj 8 --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+          $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/chime6/s5_track1/local/check_tools.sh b/egs/chime6/s5_track1/local/check_tools.sh
new file mode 100755
index 00000000000..8e80e25ca33
--- /dev/null
+++ b/egs/chime6/s5_track1/local/check_tools.sh
@@ -0,0 +1,76 @@
+#!/bin/bash -u
+
+# Copyright 2015 (c) Johns Hopkins University (Jan Trmal <jtrmal@gmail.com>)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+[ -f ./path.sh ] && . ./path.sh
+
+command -v uconv &>/dev/null \
+  || { echo  >&2 "uconv not found on PATH. You will have to install ICU4C"; exit 1; }
+
+command -v ngram &>/dev/null \
+  || { echo  >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh to install it"; exit 1; }
+
+if [  -z ${LIBLBFGS} ]; then
+  echo >&2  "SRILM is not compiled with the support of MaxEnt models."
+  echo >&2  "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh"
+  echo >&2  "which will take care of compiling the SRILM with MaxEnt support"
+  exit 1;
+fi
+
+sox=`command -v sox 2>/dev/null` \
+  || { echo  >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; }
+
+# If sox is found on path, check if the version is correct
+if [ ! -z "$sox" ]; then
+  sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'`
+  if [[ ! $sox_version =~ v14.4.* ]]; then
+    echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher."
+    exit 1
+  fi
+fi
+
+command -v phonetisaurus-align &>/dev/null \
+  || { echo  >&2 "Phonetisaurus not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_phonetisaurus.sh to install it"; exit 1; }
+
+command -v BeamformIt &>/dev/null \
+  || { echo  >&2 "BeamformIt not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_beamformit.sh to install it"; exit 1; }
+
+miniconda_dir=$HOME/miniconda3/
+if [ ! -d $miniconda_dir ]; then
+    echo "$miniconda_dir does not exist. Please run '../../../tools/extras/install_miniconda.sh'"
+fi
+
+# check if WPE is installed
+result=`$miniconda_dir/bin/python -c "\
+try:
+    import nara_wpe
+    print('1')
+except ImportError:
+    print('0')"`
+
+if [ "$result" != "1" ]; then
+  echo "WPE is not installed. Please run ../../../tools/extras/install_wpe.sh"
+  exit 1
+fi
+
+# this is used for the audio synchronization
+sox_conda=`command -v ${miniconda_dir}/bin/sox 2>/dev/null`
+if [ -z "${sox_conda}" ]; then
+  echo "install conda sox (v14.4.2)" 
+  ${miniconda_dir}/bin/conda install -c conda-forge sox
+fi
+
+exit  0
diff --git a/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh b/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
new file mode 100755
index 00000000000..82839604c9e
--- /dev/null
+++ b/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+cmd=queue.pl
+nj=40
+stage=0
+speed_perturb=true
+
+. ./path.sh
+. utils/parse_options.sh
+
+if [ $# -ne 4 ]; then
+  echo "Usage: $0 <utt-map> <data-dir> <src-lat-dir> <out-lat-dir>"
+  exit 1
+fi
+
+utt_map=$1
+data=$2
+srcdir=$3
+dir=$4
+
+mkdir -p $dir
+
+cp $srcdir/{phones.txt,tree,final.mdl} $dir || exit 1
+cp $srcdir/{final.alimdl,final.occs,splice_opts,cmvn_opts,delta_opts,final.mat,full.mat} 2>/dev/null || true
+
+nj_src=$(cat $srcdir/num_jobs) || exit 1
+
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj_src $dir/log/copy_lats_orig.JOB.log \
+    lattice-copy "ark:gunzip -c $srcdir/lat.JOB.gz |" \
+    ark,scp:$dir/lat_orig.JOB.ark,$dir/lat_orig.JOB.scp || exit 1
+fi
+
+for n in $(seq $nj_src); do
+  cat $dir/lat_orig.$n.scp
+done > $dir/lat_orig.scp || exit 1
+
+if $speed_perturb; then
+  for s in 0.9 1.1; do
+    awk -v s=$s '{print "sp"s"-"$1" sp"s"-"$2}' $utt_map
+  done | cat - $utt_map | sort -k1,1 > $dir/utt_map
+  utt_map=$dir/utt_map
+fi
+
+if [ $stage -le 2 ]; then
+  utils/filter_scp.pl -f 2 $dir/lat_orig.scp < $utt_map | \
+    utils/apply_map.pl -f 2 $dir/lat_orig.scp > \
+    $dir/lat.scp || exit 1
+
+  if [ ! -s $dir/lat.scp ]; then
+    echo "$0: $dir/lat.scp is empty. Something went wrong!"
+    exit 1
+  fi
+fi
+
+utils/split_data.sh $data $nj
+
+if [ $stage -le 3 ]; then
+  $cmd JOB=1:$nj $dir/log/copy_lats.JOB.log \
+    lattice-copy "scp:utils/filter_scp.pl $data/split$nj/JOB/utt2spk $dir/lat.scp |" \
+    "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1
+fi
+
+echo $nj > $dir/num_jobs
+
+if [ -f $srcdir/ali.1.gz ]; then
+  if [ $stage -le 4 ]; then
+    $cmd JOB=1:$nj_src $dir/log/copy_ali_orig.JOB.log \
+      copy-int-vector "ark:gunzip -c $srcdir/ali.JOB.gz |" \
+      ark,scp:$dir/ali_orig.JOB.ark,$dir/ali_orig.JOB.scp || exit 1
+  fi
+
+  for n in $(seq $nj_src); do
+    cat $dir/ali_orig.$n.scp
+  done > $dir/ali_orig.scp || exit 1
+
+  if [ $stage -le 5 ]; then
+    utils/filter_scp.pl -f 2 $dir/ali_orig.scp < $utt_map | \
+      utils/apply_map.pl -f 2 $dir/ali_orig.scp > \
+      $dir/ali.scp || exit 1
+  
+    if [ ! -s $dir/ali.scp ]; then
+      echo "$0: $dir/ali.scp is empty. Something went wrong!"
+      exit 1
+    fi
+  fi
+
+  utils/split_data.sh $data $nj
+
+  if [ $stage -le 6 ]; then
+    $cmd JOB=1:$nj $dir/log/copy_ali.JOB.log \
+      copy-int-vector "scp:utils/filter_scp.pl $data/split$nj/JOB/utt2spk $dir/ali.scp |" \
+      "ark:|gzip -c > $dir/ali.JOB.gz" || exit 1
+  fi
+fi
+
+rm $dir/lat_orig.*.{ark,scp} $dir/ali_orig.*.{ark,scp} 2>/dev/null || true
diff --git a/egs/chime6/s5_track1/local/decode.sh b/egs/chime6/s5_track1/local/decode.sh
new file mode 100755
index 00000000000..b44716ba4ac
--- /dev/null
+++ b/egs/chime6/s5_track1/local/decode.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+#
+# Based mostly on the TED-LIUM and Switchboard recipe
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
+# Apache 2.0
+#
+# This is a subset of run.sh to only perform recognition experiments with evaluation data
+# This script can be run from run.sh or standalone. 
+# To run it standalone, you can download a pretrained chain ASR model using:
+# wget http://kaldi-asr.org/models/12/0012_asr_v1.tar.gz
+# Once it is downloaded, extract using: tar -xvzf 0012_asr_v1.tar.gz
+# and copy the contents of the {data/ exp/} directory to your {data/ exp/}
+
+# Begin configuration section.
+decode_nj=20
+gss_nj=50
+stage=0
+enhancement=gss        # for a new enhancement method,
+                       # change this variable and stage 4
+
+# training data
+train_set=train_worn_simu_u400k
+# End configuration section
+. ./utils/parse_options.sh
+
+. ./cmd.sh
+. ./path.sh
+
+
+set -e # exit on error
+
+# chime5 main directory path
+# please change the path accordingly
+chime5_corpus=/export/corpora4/CHiME5
+# chime6 data directories, which are generated from ${chime5_corpus},
+# to synchronize audio files across arrays and modify the annotation (JSON) file accordingly
+chime6_corpus=${PWD}/CHiME6
+json_dir=${chime6_corpus}/transcriptions
+audio_dir=${chime6_corpus}/audio
+
+enhanced_dir=enhanced
+if [[ ${enhancement} == *gss* ]]; then
+  enhanced_dir=${enhanced_dir}_multiarray
+  enhancement=${enhancement}_multiarray
+fi
+
+if [[ ${enhancement} == *beamformit* ]]; then
+  enhanced_dir=${enhanced_dir}
+  enhancement=${enhancement}
+fi
+
+enhanced_dir=$(utils/make_absolute.sh $enhanced_dir) || exit 1
+test_sets="dev_${enhancement} eval_${enhancement}"
+
+# This script also needs the phonetisaurus g2p, srilm, beamformit
+./local/check_tools.sh || exit 1
+
+###########################################################################
+# We first generate the synchronized audio files across arrays and
+# corresponding JSON files. Note that this requires sox v14.4.2,
+# which is installed via miniconda in ./local/check_tools.sh
+###########################################################################
+
+if [ $stage -le 0 ]; then
+  local/generate_chime6_data.sh \
+    --cmd "$train_cmd" \
+    ${chime5_corpus} \
+    ${chime6_corpus}
+fi
+
+#########################################################################################
+# In stage 1, we perform GSS based enhancement or beamformit for the test sets. multiarray = true
+#can take around 10hrs for dev and eval set.
+#########################################################################################
+
+if [ $stage -le 1 ] && [[ ${enhancement} == *gss* ]]; then
+  echo "$0:  enhance data..."
+  # Guided Source Separation (GSS) from Paderborn University
+  # http://spandh.dcs.shef.ac.uk/chime_workshop/papers/CHiME_2018_paper_boeddecker.pdf
+  # @Article{PB2018CHiME5,
+  #   author    = {Boeddeker, Christoph and Heitkaemper, Jens and Schmalenstroeer, Joerg and Drude, Lukas and Heymann, Jahn and Haeb-Umbach, Reinhold},
+  #   title     = {{Front-End Processing for the CHiME-5 Dinner Party Scenario}},
+  #   year      = {2018},
+  #   booktitle = {CHiME5 Workshop},
+  # }
+
+  if [ ! -d pb_chime5/ ]; then
+    local/install_pb_chime5.sh
+  fi
+
+  if [ ! -f pb_chime5/cache/chime6.json ]; then
+    (
+    cd pb_chime5
+    miniconda_dir=$HOME/miniconda3/
+    export PATH=$miniconda_dir/bin:$PATH
+    export CHIME6_DIR=$chime6_corpus
+    make cache/chime6.json
+    )
+  fi
+
+  for dset in dev eval; do
+    local/run_gss.sh \
+      --cmd "$train_cmd --max-jobs-run $gss_nj" --nj 160 \
+      ${dset} \
+      ${enhanced_dir} \
+      ${enhanced_dir} || exit 1
+  done
+
+  for dset in dev eval; do
+    local/prepare_data.sh --mictype gss ${enhanced_dir}/audio/${dset} \
+      ${json_dir}/${dset} data/${dset}_${enhancement} || exit 1
+  done
+fi
+
+#######################################################################
+# Prepare the dev and eval data with dereverberation (WPE) and
+# beamforming.
+#######################################################################
+
+if [ $stage -le 1 ] && [[ ${enhancement} == *beamformit* ]]; then
+  # Beamforming using reference arrays
+  # enhanced WAV directory
+  enhanced_dir=enhan
+  dereverb_dir=${PWD}/wav/wpe/
+  for dset in dev eval; do
+    for mictype in u01 u02 u03 u04 u05 u06; do
+      local/run_wpe.sh --nj 4 --cmd "$train_cmd --mem 20G" \
+               ${audio_dir}/${dset} \
+               ${dereverb_dir}/${dset} \
+               ${mictype}
+    done
+  done
+
+  for dset in dev eval; do
+    for mictype in u01 u02 u03 u04 u05 u06; do
+      local/run_beamformit.sh --cmd "$train_cmd" \
+                      ${dereverb_dir}/${dset} \
+                      ${enhanced_dir}/${dset}_${enhancement}_${mictype} \
+                      ${mictype}
+    done
+  done
+
+  for dset in dev eval; do
+    local/prepare_data.sh --mictype ref "$PWD/${enhanced_dir}/${dset}_${enhancement}_u0*" \
+                      ${json_dir}/${dset} data/${dset}_${enhancement}
+  done
+fi
+
+# In GSS enhancement, we do not have array information in utterance ID
+if [ $stage -le 2 ] && [[ ${enhancement} == *gss* ]]; then
+  # Split speakers up into 3-minute chunks.  This doesn't hurt adaptation, and
+  # lets us use more jobs for decoding etc.
+  for dset in ${test_sets}; do
+    utils/copy_data_dir.sh data/${dset} data/${dset}_orig
+  done
+
+  for dset in ${test_sets}; do
+    utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_orig data/${dset}
+  done
+fi
+
+if [ $stage -le 2 ] && [[ ${enhancement} == *beamformit* ]]; then
+  # fix speaker ID issue (thanks to Dr. Naoyuki Kanda)
+  # add array ID to the speaker ID to avoid the use of other array information to meet regulations
+  # Before this fix
+  # $ head -n 2 data/eval_beamformit_ref_nosplit/utt2spk
+  # P01_S01_U02_KITCHEN.ENH-0000192-0001278 P01
+  # P01_S01_U02_KITCHEN.ENH-0001421-0001481 P01
+  # After this fix
+  # $ head -n 2 data/eval_beamformit_ref_nosplit_fix/utt2spk
+  # P01_S01_U02_KITCHEN.ENH-0000192-0001278 P01_U02
+  # P01_S01_U02_KITCHEN.ENH-0001421-0001481 P01_U02
+  echo "$0: fix data..."
+  for dset in ${test_sets}; do
+    utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit
+    mkdir -p data/${dset}_nosplit_fix
+    for f in segments text wav.scp; do
+      if [ -f data/${dset}_nosplit/$f ]; then
+        cp data/${dset}_nosplit/$f data/${dset}_nosplit_fix
+      fi
+    done
+    awk -F "_" '{print $0 "_" $3}' data/${dset}_nosplit/utt2spk > data/${dset}_nosplit_fix/utt2spk
+    utils/utt2spk_to_spk2utt.pl data/${dset}_nosplit_fix/utt2spk > data/${dset}_nosplit_fix/spk2utt
+  done
+
+  # Split speakers up into 3-minute chunks.  This doesn't hurt adaptation, and
+  # lets us use more jobs for decoding etc.
+  for dset in ${test_sets}; do
+    utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit_fix data/${dset}
+  done
+fi
+
+##########################################################################
+# DECODING: we perform 2 stage decoding.
+##########################################################################
+
+nnet3_affix=_${train_set}_cleaned_rvb
+lm_suffix=
+
+if [ $stage -le 3 ]; then
+  # First the options that are passed through to run_ivector_common.sh
+  # (some of which are also used in this script directly).
+
+  # The rest are configs specific to this script.  Most of the parameters
+  # are just hardcoded at this level, in the commands below.
+  echo "$0: decode data..."
+  affix=1b   # affix for the TDNN directory name
+  tree_affix=
+  tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+  dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+
+  # training options
+  # training chunk-options
+  chunk_width=140,100,160
+  # we don't need extra left/right context for TDNN systems.
+  chunk_left_context=0
+  chunk_right_context=0
+  
+  utils/mkgraph.sh \
+      --self-loop-scale 1.0 data/lang${lm_suffix}/ \
+      $tree_dir $tree_dir/graph${lm_suffix} || exit 1;
+
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      local/nnet3/decode.sh --affix 2stage --pass2-decode-opts "--min-active 1000" \
+        --acwt 1.0 --post-decode-acwt 10.0 \
+        --frames-per-chunk 150 --nj $decode_nj \
+        --ivector-dir exp/nnet3${nnet3_affix} \
+        data/${data} data/lang${lm_suffix} \
+        $tree_dir/graph${lm_suffix} \
+        exp/chain${nnet3_affix}/tdnn${affix}_sp
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+##########################################################################
+# Scoring: here we obtain wer per session per location and overall WER
+##########################################################################
+
+if [ $stage -le 4 ]; then
+  # final scoring to get the official challenge result
+  # please specify both dev and eval set directories so that the search parameters
+  # (insertion penalty and language model weight) will be tuned using the dev set
+  local/score_for_submit.sh --enhancement $enhancement --json $json_dir \
+      --dev exp/chain${nnet3_affix}/tdnn1b_sp/decode${lm_suffix}_dev_${enhancement}_2stage \
+      --eval exp/chain${nnet3_affix}/tdnn1b_sp/decode${lm_suffix}_eval_${enhancement}_2stage
+fi
diff --git a/egs/chime6/s5_track1/local/distant_audio_list b/egs/chime6/s5_track1/local/distant_audio_list
new file mode 100644
index 00000000000..710945b014b
--- /dev/null
+++ b/egs/chime6/s5_track1/local/distant_audio_list
@@ -0,0 +1,372 @@
+S03_U01.CH1
+S03_U01.CH2
+S03_U01.CH3
+S03_U01.CH4
+S03_U02.CH1
+S03_U02.CH2
+S03_U02.CH3
+S03_U02.CH4
+S03_U03.CH1
+S03_U03.CH2
+S03_U03.CH3
+S03_U03.CH4
+S03_U04.CH1
+S03_U04.CH2
+S03_U04.CH3
+S03_U04.CH4
+S03_U05.CH1
+S03_U05.CH2
+S03_U05.CH3
+S03_U05.CH4
+S03_U06.CH1
+S03_U06.CH2
+S03_U06.CH3
+S03_U06.CH4
+S04_U01.CH1
+S04_U01.CH2
+S04_U01.CH3
+S04_U01.CH4
+S04_U02.CH1
+S04_U02.CH2
+S04_U02.CH3
+S04_U02.CH4
+S04_U03.CH1
+S04_U03.CH2
+S04_U03.CH3
+S04_U03.CH4
+S04_U04.CH1
+S04_U04.CH2
+S04_U04.CH3
+S04_U04.CH4
+S04_U05.CH1
+S04_U05.CH2
+S04_U05.CH3
+S04_U05.CH4
+S04_U06.CH1
+S04_U06.CH2
+S04_U06.CH3
+S04_U06.CH4
+S05_U01.CH1
+S05_U01.CH2
+S05_U01.CH3
+S05_U01.CH4
+S05_U02.CH1
+S05_U02.CH2
+S05_U02.CH3
+S05_U02.CH4
+S05_U05.CH1
+S05_U05.CH2
+S05_U05.CH3
+S05_U05.CH4
+S05_U06.CH1
+S05_U06.CH2
+S05_U06.CH3
+S05_U06.CH4
+S06_U01.CH1
+S06_U01.CH2
+S06_U01.CH3
+S06_U01.CH4
+S06_U02.CH1
+S06_U02.CH2
+S06_U02.CH3
+S06_U02.CH4
+S06_U03.CH1
+S06_U03.CH2
+S06_U03.CH3
+S06_U03.CH4
+S06_U04.CH1
+S06_U04.CH2
+S06_U04.CH3
+S06_U04.CH4
+S06_U05.CH1
+S06_U05.CH2
+S06_U05.CH3
+S06_U05.CH4
+S06_U06.CH1
+S06_U06.CH2
+S06_U06.CH3
+S06_U06.CH4
+S07_U01.CH1
+S07_U01.CH2
+S07_U01.CH3
+S07_U01.CH4
+S07_U02.CH1
+S07_U02.CH2
+S07_U02.CH3
+S07_U02.CH4
+S07_U03.CH1
+S07_U03.CH2
+S07_U03.CH3
+S07_U03.CH4
+S07_U04.CH1
+S07_U04.CH2
+S07_U04.CH3
+S07_U04.CH4
+S07_U05.CH1
+S07_U05.CH2
+S07_U05.CH3
+S07_U05.CH4
+S07_U06.CH1
+S07_U06.CH2
+S07_U06.CH3
+S07_U06.CH4
+S08_U01.CH1
+S08_U01.CH2
+S08_U01.CH3
+S08_U01.CH4
+S08_U02.CH1
+S08_U02.CH2
+S08_U02.CH3
+S08_U02.CH4
+S08_U03.CH1
+S08_U03.CH2
+S08_U03.CH3
+S08_U03.CH4
+S08_U04.CH1
+S08_U04.CH2
+S08_U04.CH3
+S08_U04.CH4
+S08_U05.CH1
+S08_U05.CH2
+S08_U05.CH3
+S08_U05.CH4
+S08_U06.CH1
+S08_U06.CH2
+S08_U06.CH3
+S08_U06.CH4
+S12_U01.CH1
+S12_U01.CH2
+S12_U01.CH3
+S12_U01.CH4
+S12_U02.CH1
+S12_U02.CH2
+S12_U02.CH3
+S12_U02.CH4
+S12_U03.CH1
+S12_U03.CH2
+S12_U03.CH3
+S12_U03.CH4
+S12_U04.CH1
+S12_U04.CH2
+S12_U04.CH3
+S12_U04.CH4
+S12_U05.CH1
+S12_U05.CH2
+S12_U05.CH3
+S12_U05.CH4
+S12_U06.CH1
+S12_U06.CH2
+S12_U06.CH3
+S12_U06.CH4
+S13_U01.CH1
+S13_U01.CH2
+S13_U01.CH3
+S13_U01.CH4
+S13_U02.CH1
+S13_U02.CH2
+S13_U02.CH3
+S13_U02.CH4
+S13_U03.CH1
+S13_U03.CH2
+S13_U03.CH3
+S13_U03.CH4
+S13_U04.CH1
+S13_U04.CH2
+S13_U04.CH3
+S13_U04.CH4
+S13_U05.CH1
+S13_U05.CH2
+S13_U05.CH3
+S13_U05.CH4
+S13_U06.CH1
+S13_U06.CH2
+S13_U06.CH3
+S13_U06.CH4
+S16_U01.CH1
+S16_U01.CH2
+S16_U01.CH3
+S16_U01.CH4
+S16_U02.CH1
+S16_U02.CH2
+S16_U02.CH3
+S16_U02.CH4
+S16_U03.CH1
+S16_U03.CH2
+S16_U03.CH3
+S16_U03.CH4
+S16_U04.CH1
+S16_U04.CH2
+S16_U04.CH3
+S16_U04.CH4
+S16_U05.CH1
+S16_U05.CH2
+S16_U05.CH3
+S16_U05.CH4
+S16_U06.CH1
+S16_U06.CH2
+S16_U06.CH3
+S16_U06.CH4
+S17_U01.CH1
+S17_U01.CH2
+S17_U01.CH3
+S17_U01.CH4
+S17_U02.CH1
+S17_U02.CH2
+S17_U02.CH3
+S17_U02.CH4
+S17_U03.CH1
+S17_U03.CH2
+S17_U03.CH3
+S17_U03.CH4
+S17_U04.CH1
+S17_U04.CH2
+S17_U04.CH3
+S17_U04.CH4
+S17_U05.CH1
+S17_U05.CH2
+S17_U05.CH3
+S17_U05.CH4
+S17_U06.CH1
+S17_U06.CH2
+S17_U06.CH3
+S17_U06.CH4
+S18_U01.CH1
+S18_U01.CH2
+S18_U01.CH3
+S18_U01.CH4
+S18_U02.CH1
+S18_U02.CH2
+S18_U02.CH3
+S18_U02.CH4
+S18_U03.CH1
+S18_U03.CH2
+S18_U03.CH3
+S18_U03.CH4
+S18_U04.CH1
+S18_U04.CH2
+S18_U04.CH3
+S18_U04.CH4
+S18_U05.CH1
+S18_U05.CH2
+S18_U05.CH3
+S18_U05.CH4
+S18_U06.CH1
+S18_U06.CH2
+S18_U06.CH3
+S18_U06.CH4
+S19_U01.CH1
+S19_U01.CH2
+S19_U01.CH3
+S19_U01.CH4
+S19_U02.CH1
+S19_U02.CH2
+S19_U02.CH3
+S19_U02.CH4
+S19_U03.CH1
+S19_U03.CH2
+S19_U03.CH3
+S19_U03.CH4
+S19_U04.CH1
+S19_U04.CH2
+S19_U04.CH3
+S19_U04.CH4
+S19_U05.CH1
+S19_U05.CH2
+S19_U05.CH3
+S19_U05.CH4
+S19_U06.CH1
+S19_U06.CH2
+S19_U06.CH3
+S19_U06.CH4
+S20_U01.CH1
+S20_U01.CH2
+S20_U01.CH3
+S20_U01.CH4
+S20_U02.CH1
+S20_U02.CH2
+S20_U02.CH3
+S20_U02.CH4
+S20_U03.CH1
+S20_U03.CH2
+S20_U03.CH3
+S20_U03.CH4
+S20_U04.CH1
+S20_U04.CH2
+S20_U04.CH3
+S20_U04.CH4
+S20_U05.CH1
+S20_U05.CH2
+S20_U05.CH3
+S20_U05.CH4
+S20_U06.CH1
+S20_U06.CH2
+S20_U06.CH3
+S20_U06.CH4
+S22_U01.CH1
+S22_U01.CH2
+S22_U01.CH3
+S22_U01.CH4
+S22_U02.CH1
+S22_U02.CH2
+S22_U02.CH3
+S22_U02.CH4
+S22_U04.CH1
+S22_U04.CH2
+S22_U04.CH3
+S22_U04.CH4
+S22_U05.CH1
+S22_U05.CH2
+S22_U05.CH3
+S22_U05.CH4
+S22_U06.CH1
+S22_U06.CH2
+S22_U06.CH3
+S22_U06.CH4
+S23_U01.CH1
+S23_U01.CH2
+S23_U01.CH3
+S23_U01.CH4
+S23_U02.CH1
+S23_U02.CH2
+S23_U02.CH3
+S23_U02.CH4
+S23_U03.CH1
+S23_U03.CH2
+S23_U03.CH3
+S23_U03.CH4
+S23_U04.CH1
+S23_U04.CH2
+S23_U04.CH3
+S23_U04.CH4
+S23_U05.CH1
+S23_U05.CH2
+S23_U05.CH3
+S23_U05.CH4
+S23_U06.CH1
+S23_U06.CH2
+S23_U06.CH3
+S23_U06.CH4
+S24_U01.CH1
+S24_U01.CH2
+S24_U01.CH3
+S24_U01.CH4
+S24_U02.CH1
+S24_U02.CH2
+S24_U02.CH3
+S24_U02.CH4
+S24_U03.CH1
+S24_U03.CH2
+S24_U03.CH3
+S24_U03.CH4
+S24_U04.CH1
+S24_U04.CH2
+S24_U04.CH3
+S24_U04.CH4
+S24_U05.CH1
+S24_U05.CH2
+S24_U05.CH3
+S24_U05.CH4
+S24_U06.CH1
+S24_U06.CH2
+S24_U06.CH3
+S24_U06.CH4
diff --git a/egs/chime6/s5_track1/local/extract_noises.py b/egs/chime6/s5_track1/local/extract_noises.py
new file mode 100755
index 00000000000..8f617752f2d
--- /dev/null
+++ b/egs/chime6/s5_track1/local/extract_noises.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import logging
+import os
+import sys
+import scipy.io.wavfile as siw
+import math
+import numpy as np
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        """Extract noises from the corpus based on the non-speech regions.
+        e.g. {} /export/corpora4/CHiME5/audio/train/ \\
+                /export/corpora4/CHiME5/transcriptions/train/ \\
+                /export/b05/zhiqiw/noise/""".format(sys.argv[0]))
+
+    parser.add_argument("--segment-length", default=20)
+    parser.add_argument("audio_dir", help="""Location of the CHiME5 Audio files. e.g. /export/corpora4/CHiME5/audio/train/""")
+    parser.add_argument("trans_dir", help="""Location of the CHiME5 Transcriptions. e.g. /export/corpora4/CHiME5/transcriptions/train/""")
+    parser.add_argument("audio_list", help="""List of ids of the CHiME5 recordings from which noise is extracted. e.g. local/distant_audio_list""")
+    parser.add_argument("out_dir", help="Output directory to write noise files. e.g. /export/b05/zhiqiw/noise/")
+
+    args = parser.parse_args()
+    return args
+
+
+def Trans_time(time, fs):
+    units = time.split(':')
+    time_second = float(units[0]) * 3600 + float(units[1]) * 60 + float(units[2])
+    return int(time_second*fs)
+
+
+# remove mic dependency for CHiME-6
+def Get_time(conf, tag, fs):
+    for i in conf:
+        st = Trans_time(i['start_time'], fs)
+        ed = Trans_time(i['end_time'], fs)
+        tag[st:ed] = 0
+    return tag
+
+
+def write_noise(out_dir, seg, audio, sig, tag, fs, cnt):
+    sig_noise = sig[np.nonzero(tag)]
+    for i in range(math.floor(len(sig_noise)/(seg*fs))):
+        siw.write(out_dir +'/noise'+str(cnt)+'.wav', fs, sig_noise[i*seg*fs:(i+1)*seg*fs])
+        cnt += 1
+    return cnt
+
+
+def main():
+    args = get_args()
+
+    if not os.path.exists(args.out_dir):
+        os.makedirs(args.out_dir)
+
+    wav_list = open(args.audio_list).readlines()
+
+    cnt = 1
+    for i, audio in enumerate(wav_list):
+        parts = audio.strip().split('.')
+        if len(parts) == 2:
+            # Assuming distant mic with name like S03_U01.CH1
+            session, mic = parts[0].split('_')
+            channel = parts[1]
+            base_name = session + "_" + mic + "." + channel
+        else:
+            # Assuming close talk mic with name like S03_P09
+            session, mic = audio.strip().split('_')
+            base_name = session + "_" + mic
+        fs, sig = siw.read(args.audio_dir + "/" + base_name + '.wav')
+        tag = np.ones(len(sig))
+        if i == 0 or session != session_p:
+            with open(args.trans_dir + "/" + session + '.json') as f:
+                conf = json.load(f)
+        tag = Get_time(conf, tag, fs)
+        cnt = write_noise(args.out_dir, args.segment_length, audio, sig, tag, fs, cnt)
+        session_p = session
+
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/chime6/s5_track1/local/extract_vad_weights.sh b/egs/chime6/s5_track1/local/extract_vad_weights.sh
new file mode 100755
index 00000000000..250b021bd8f
--- /dev/null
+++ b/egs/chime6/s5_track1/local/extract_vad_weights.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
+#           2019 Vimal Manohar
+# Apache 2.0.
+
+# This script converts lattices available from a first pass decode into a per-frame weights file
+# The ctms generated from the lattices are filtered. Silence frames are assigned a low weight (e.g.0.00001)
+# and voiced frames have a weight of 1.
+
+set -e
+
+stage=1
+cmd=run.pl
+silence_weight=0.00001
+#end configuration section.
+
+. ./cmd.sh
+
+[ -f ./path.sh ] && . ./path.sh
+. utils/parse_options.sh || exit 1;
+if [ $# -ne 4 ]; then
+  echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <input-decode-dir> <output-wts-file-gzipped>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  exit 1;
+fi
+
+data_dir=$1
+lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
+decode_dir=$3
+output_wts_file_gz=$4
+
+if [ $stage -le 1 ]; then
+  echo "$0: generating CTM from input lattices"
+  steps/get_ctm_conf.sh --cmd "$cmd" \
+    --use-segments false \
+    $data_dir \
+    $lang \
+    $decode_dir
+fi
+
+if [ $stage -le 2 ]; then
+  name=`basename $data_dir`
+  # we just take the ctm from LMWT 10, it doesn't seem to affect the results a lot
+  ctm=$decode_dir/score_10/$name.ctm
+  echo "$0: generating weights file from ctm $ctm"
+
+  pad_frames=0  # this did not seem to be helpful but leaving it as an option.
+  feat-to-len scp:$data_dir/feats.scp ark,t:- >$decode_dir/utt.lengths
+  if [ ! -f $ctm ]; then  echo "$0: expected ctm to exist: $ctm"; exit 1; fi
+
+  cat $ctm | awk '$6 == 1.0 && $4 < 1.0' | \
+  grep -v -w mm | grep -v -w mhm | grep -v -F '[noise]' | \
+  grep -v -F '[laughter]' | grep -v -F '<unk>' | \
+  perl -e ' $lengths=shift @ARGV;  $pad_frames=shift @ARGV; $silence_weight=shift @ARGV;
+   $pad_frames >= 0 || die "bad pad-frames value $pad_frames";
+   open(L, "<$lengths") || die "opening lengths file";
+   @all_utts = ();
+   $utt2ref = { };
+   while (<L>) {
+     ($utt, $len) = split(" ", $_);
+     push @all_utts, $utt;
+     $array_ref = [ ];
+     for ($n = 0; $n < $len; $n++) { ${$array_ref}[$n] = $silence_weight; }
+     $utt2ref{$utt} = $array_ref;
+   }
+   while (<STDIN>) {
+     @A = split(" ", $_);
+     @A == 6 || die "bad ctm line $_";
+     $utt = $A[0]; $beg = $A[2]; $len = $A[3];
+     $beg_int = int($beg * 100) - $pad_frames;
+     $len_int = int($len * 100) + 2*$pad_frames;
+     $array_ref = $utt2ref{$utt};
+     !defined $array_ref  && die "No length info for utterance $utt";
+     for ($t = $beg_int; $t < $beg_int + $len_int; $t++) {
+       if ($t >= 0 && $t < @$array_ref) {
+         ${$array_ref}[$t] = 1;
+        }
+      }
+    }
+    foreach $utt (@all_utts) {  $array_ref = $utt2ref{$utt};
+      print $utt, " [ ", join(" ", @$array_ref), " ]\n";
+      } ' $decode_dir/utt.lengths $pad_frames $silence_weight | \
+        gzip -c > $output_wts_file_gz
+fi
diff --git a/egs/chime6/s5_track1/local/generate_chime6_data.sh b/egs/chime6/s5_track1/local/generate_chime6_data.sh
new file mode 100755
index 00000000000..93106cf605a
--- /dev/null
+++ b/egs/chime6/s5_track1/local/generate_chime6_data.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# Copyright 2019, Johns Hopkins University (Author: Shinji Watanabe)
+# Apache 2.0
+#
+# This script generates synchronized audio data across arrays by considering
+# the frame dropping, clock drift etc. done by Prof. Jon Barker at University of
+# Sheffield. This script first downloads the synchronization tool and generate
+# the synchronized audios and corresponding JSON transcription files
+# Note that
+# 1) the JSON format is slightly changed from the original CHiME-5 one (simplified
+# thanks to the synchronization)
+# 2) it requires sox v.14.4.2 and Python 3.6.7
+# Unfortunately, the generated files would be different depending on the sox
+# and Python versions and to generate the exactly same audio files, this script uses
+# the fixed versions of sox and Python installed in the miniconda instead of system ones
+
+. ./cmd.sh
+. ./path.sh
+
+# Config:
+cmd=run.pl
+
+. utils/parse_options.sh || exit 1;
+
+if [ $# != 2 ]; then
+  echo "Wrong #arguments ($#, expected 2)"
+  echo "Usage: local/generate_chime6_data.sh [options] <chime5-in-dir> <chime6-out-dir>"
+  echo "main options (for others, see top of script file)"
+  echo "  --cmd <cmd> # Command to run in parallel with"
+  exit 1;
+fi
+
+sdir=$1
+odir=$2
+expdir=${PWD}/exp/chime6_data
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+# get chime6-synchronisation tools
+SYNC_PATH=${PWD}/chime6-synchronisation
+if [ ! -d ${SYNC_PATH} ]; then
+  git clone https://github.com/chimechallenge/chime6-synchronisation.git
+fi
+
+mkdir -p ${odir}
+mkdir -p ${expdir}/log
+
+# split the session to avoid too much disk access
+sessions1="S01 S02 S03 S04 S05 S06 S07"
+sessions2="S08 S09 S12 S13 S16 S17 S18"
+sessions3="S19 S20 S21 S22 S23 S24"
+
+CONDA_PATH=${HOME}/miniconda3/bin
+IN_PATH=${sdir}/audio
+OUT_PATH=${odir}/audio
+TMP_PATH=${odir}/audio_tmp
+
+if [ ! -d "${IN_PATH}" ]; then
+  echo "please specify the CHiME-5 data path correctly"
+  exit 1
+fi
+mkdir -p $OUT_PATH/train $OUT_PATH/eval $OUT_PATH/dev
+mkdir -p $TMP_PATH/train $TMP_PATH/eval $TMP_PATH/dev
+
+if [ -f ${odir}/audio/dev/S02_P05.wav ]; then
+  echo "CHiME-6 date already exists"
+  exit 0
+fi
+
+pushd ${SYNC_PATH}
+echo "Correct for frame dropping"
+for session in ${sessions1}; do
+  $cmd ${expdir}/correct_signals_for_frame_drops.${session}.log \
+    ${CONDA_PATH}/python correct_signals_for_frame_drops.py --session=${session} chime6_audio_edits.json $IN_PATH $TMP_PATH &
+done
+wait
+for session in ${sessions2}; do
+  $cmd ${expdir}/correct_signals_for_frame_drops.${session}.log \
+    ${CONDA_PATH}/python correct_signals_for_frame_drops.py --session=${session} chime6_audio_edits.json $IN_PATH $TMP_PATH &
+done
+wait
+for session in ${sessions3}; do
+  $cmd ${expdir}/correct_signals_for_frame_drops.${session}.log \
+    ${CONDA_PATH}/python correct_signals_for_frame_drops.py --session=${session} chime6_audio_edits.json $IN_PATH $TMP_PATH &
+done
+wait
+
+echo "Sox processing for correcting clock drift"
+for session in ${sessions1}; do
+  $cmd ${expdir}/correct_signals_for_clock_drift.${session}.log \
+    ${CONDA_PATH}/python correct_signals_for_clock_drift.py --session=${session} --sox_path $CONDA_PATH chime6_audio_edits.json $TMP_PATH $OUT_PATH &
+done
+wait
+for session in ${sessions2}; do
+  $cmd ${expdir}/correct_signals_for_clock_drift.${session}.log \
+    ${CONDA_PATH}/python correct_signals_for_clock_drift.py --session=${session} --sox_path $CONDA_PATH chime6_audio_edits.json $TMP_PATH $OUT_PATH &
+done
+wait
+for session in ${sessions3}; do
+  $cmd ${expdir}/correct_signals_for_clock_drift.${session}.log \
+    ${CONDA_PATH}/python correct_signals_for_clock_drift.py --session=${session} --sox_path $CONDA_PATH chime6_audio_edits.json $TMP_PATH $OUT_PATH &
+done
+wait
+
+echo "adjust the JSON files"
+mkdir -p ${odir}/transcriptions/eval ${odir}/transcriptions/dev ${odir}/transcriptions/train
+${CONDA_PATH}/python correct_transcript_for_clock_drift.py --clock_drift_data chime6_audio_edits.json ${sdir}/transcriptions ${odir}/transcriptions
+popd
+
+# finally check md5sum
+pushd ${odir}
+echo "check MD5 hash value for generated audios"
+md5sum -c ${SYNC_PATH}/audio_md5sums.txt || echo "check https://github.com/chimechallenge/chime6-synchronisation"
+popd
+
+echo "`basename $0` Done."
diff --git a/egs/chime6/s5_track1/local/get_location.py b/egs/chime6/s5_track1/local/get_location.py
new file mode 100755
index 00000000000..92351e72e65
--- /dev/null
+++ b/egs/chime6/s5_track1/local/get_location.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# Copyright Ashish Arora
+# Apache 2.0
+# This script create a utterance and location mapping file
+# It is used in score_for_submit script to get locationwise WER.
+# for GSS enhancement
+
+import json
+from datetime import timedelta
+from glob import glob
+import sys, io
+from decimal import Decimal
+
+SAMPLE_RATE = 16000
+
+def to_samples(time: str):
+    "mapping time in string to int, as mapped in pb_chime5"
+    "see https://github.com/fgnt/pb_chime5/blob/master/pb_chime5/database/chime5/get_speaker_activity.py"
+    hours, minutes, seconds = [t for t in time.split(':')]
+    hours = int(hours)
+    minutes = int(minutes)
+    seconds = Decimal(seconds)
+
+    seconds_samples = seconds * SAMPLE_RATE
+    samples = (
+        hours * 3600 * SAMPLE_RATE
+        + minutes * 60 * SAMPLE_RATE
+        + seconds_samples
+    )
+    return int(samples)
+
+
+def main():
+    output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    json_file_location= sys.argv[1] + '/*.json'
+    json_files = glob(json_file_location)
+
+    json_file_location= sys.argv[1] + '/*.json'
+    json_files = glob(json_file_location)
+    location_dict = {}
+    json_file_location= sys.argv[1] + '/*.json'
+    json_files = glob(json_file_location)
+    location_dict = {}
+    for file in json_files:
+        with open(file, 'r') as f:
+            session_dict = json.load(f)
+
+        for uttid in session_dict:
+            try:
+                ref=uttid['ref']
+                speaker_id = uttid['speaker']
+                location = uttid['location']
+                location=location.upper()
+                session_id=uttid['session_id']
+                words = uttid['words']
+                end_sample=to_samples(str(uttid['end_time']))
+                start_sample=to_samples(str(uttid['start_time']))
+                start_sample_str = str(int(start_sample * 100 / SAMPLE_RATE)).zfill(7)
+                end_sample_str = str(int(end_sample * 100 / SAMPLE_RATE)).zfill(7)
+                utt = "{0}_{1}-{2}-{3}".format(speaker_id, session_id, start_sample_str, end_sample_str)
+                location_dict[utt]=(location)
+            except:
+                continue
+
+    for key in sorted(location_dict.keys()):
+        utt= "{0} {1}".format(key, location_dict[key])
+        output.write(utt+ '\n')
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/chime6/s5_track1/local/install_pb_chime5.sh b/egs/chime6/s5_track1/local/install_pb_chime5.sh
new file mode 100755
index 00000000000..a151dc60f12
--- /dev/null
+++ b/egs/chime6/s5_track1/local/install_pb_chime5.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Installs pb_chime5
+# miniconda should be installed in $HOME/miniconda3/ 
+
+miniconda_dir=$HOME/miniconda3/
+
+if [ ! -d $miniconda_dir ]; then
+    echo "$miniconda_dir does not exist. Please run 'tools/extras/install_miniconda.sh" && exit 1;
+fi
+
+git clone https://github.com/fgnt/pb_chime5.git
+cd pb_chime5
+# Download submodule dependencies  # https://stackoverflow.com/a/3796947/5766934
+git submodule init  
+git submodule update
+
+$miniconda_dir/bin/python -m pip install cython
+$miniconda_dir/bin/python -m pip install pymongo
+$miniconda_dir/bin/python -m pip install fire
+$miniconda_dir/bin/python -m pip install -e pb_bss/
+$miniconda_dir/bin/python -m pip install -e .
diff --git a/egs/chime6/s5_track1/local/json2text.py b/egs/chime6/s5_track1/local/json2text.py
new file mode 100755
index 00000000000..34cf52f086b
--- /dev/null
+++ b/egs/chime6/s5_track1/local/json2text.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import json
+import argparse
+import logging
+import sys
+
+
+def hms_to_seconds(hms):
+    hour = hms.split(':')[0]
+    minute = hms.split(':')[1]
+    second = hms.split(':')[2].split('.')[0]
+
+    # .xx (10 ms order)
+    ms10 = hms.split(':')[2].split('.')[1]
+
+    # total seconds
+    seconds = int(hour) * 3600 + int(minute) * 60 + int(second)
+
+    return '{:07d}'.format(int(str(seconds) + ms10))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('json', type=str, help='JSON transcription file')
+    parser.add_argument('--mictype', type=str,
+                        choices=['ref', 'worn', 'gss', 'u01', 'u02', 'u03', 'u04', 'u05', 'u06'],
+                        help='Type of microphones')
+    args = parser.parse_args()
+
+    # logging info
+    log_format = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s:%(message)s"
+    logging.basicConfig(level=logging.INFO, format=log_format)
+
+    logging.debug("reading %s", args.json)
+    with open(args.json, 'rt', encoding="utf-8") as f:
+        j = json.load(f)
+
+    for x in j:
+        if '[redacted]' not in x['words']:
+            session_id = x['session_id']
+            speaker_id = x['speaker']
+            if args.mictype == 'ref':
+                mictype = x['ref']
+            elif args.mictype == 'worn' or args.mictype == 'gss':
+                mictype = 'original'
+            else:
+                mictype = args.mictype.upper() # convert from u01 to U01
+
+            # add location tag for scoring (only for dev and eval sets)
+            if 'location' in x.keys():
+                location = x['location'].upper()
+            else:
+                location = 'NOLOCATION'
+
+            # remove mic dependency for CHiME-6
+            start_time = x['start_time']
+            end_time = x['end_time']
+
+            # remove meta chars and convert to lower
+            words = x['words'].replace('"', '')\
+                              .replace('.', '')\
+                              .replace('?', '')\
+                              .replace(',', '')\
+                              .replace(':', '')\
+                              .replace(';', '')\
+                              .replace('!', '').lower()
+
+            # remove multiple spaces
+            words = " ".join(words.split())
+
+            # convert to seconds, e.g., 1:10:05.55 -> 3600 + 600 + 5.55 = 4205.55
+            start_time = hms_to_seconds(start_time)
+            end_time = hms_to_seconds(end_time)
+
+            uttid = speaker_id + '_' + session_id
+            if not args.mictype in ['worn', 'gss']:
+                uttid += '_' + mictype
+
+            if args.mictype == 'gss':
+                uttid += '-' + start_time + '-' + end_time
+            else:
+                uttid += '_' + location + '-' + start_time + '-' + end_time
+
+            # In several utterances, there are inconsistency in the time stamp
+            # (the end time is earlier than the start time)
+            # We just ignored such utterances.
+            if end_time > start_time:
+                sys.stdout.buffer.write((uttid + ' ' + words + '\n').encode("utf-8"))
diff --git a/egs/chime6/s5_track1/local/make_noise_list.py b/egs/chime6/s5_track1/local/make_noise_list.py
new file mode 100755
index 00000000000..5aaf7fa4062
--- /dev/null
+++ b/egs/chime6/s5_track1/local/make_noise_list.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+import glob
+import os
+import sys
+
+
+if len(sys.argv) != 2:
+    print ("Usage: {} <noises-dir>".format(sys.argv[0]))
+    raise SystemExit(1)
+
+
+for line in glob.glob("{}/*.wav".format(sys.argv[1])):
+    fname = os.path.basename(line.strip())
+
+    print ("--noise-id {} --noise-type point-source "
+           "--bg-fg-type foreground {}".format(fname, line.strip()))
diff --git a/egs/chime6/s5_track1/local/nnet3/compare_wer.sh b/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
new file mode 100755
index 00000000000..095e85cc338
--- /dev/null
+++ b/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER dev_clean_2 (tgsmall) "
+  "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+    decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+     wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train acc      "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid acc      "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo
diff --git a/egs/chime6/s5_track1/local/nnet3/decode.sh b/egs/chime6/s5_track1/local/nnet3/decode.sh
new file mode 100755
index 00000000000..8fa54e0d4a6
--- /dev/null
+++ b/egs/chime6/s5_track1/local/nnet3/decode.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+# Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
+#           2019 Vimal Manohar 
+# Apache 2.0.
+
+# This script does 2-stage decoding where the first stage is used to get 
+# reliable frames for i-vector extraction.
+
+set -e
+
+# general opts
+iter=
+stage=0
+nj=30
+affix=  # affix for decode directory
+
+# ivector opts
+max_count=75  # parameter for extract_ivectors.sh
+sub_speaker_frames=6000
+ivector_scale=0.75
+get_weights_from_ctm=true
+weights_file=   # use weights from this archive (must be compressed using gunzip)
+silence_weight=0.00001   # apply this weight to silence frames during i-vector extraction
+ivector_dir=exp/nnet3
+
+# decode opts
+pass2_decode_opts="--min-active 1000"
+lattice_beam=8
+extra_left_context=0 # change for (B)LSTM
+extra_right_context=0 # change for BLSTM
+frames_per_chunk=50 # change for (B)LSTM
+acwt=0.1 # important to change this when using chain models
+post_decode_acwt=1.0 # important to change this when using chain models
+extra_left_context_initial=0
+extra_right_context_final=0
+
+graph_affix=
+
+score_opts="--min-lmwt 6 --max-lmwt 13"
+
+. ./cmd.sh
+[ -f ./path.sh ] && . ./path.sh
+. utils/parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: $0 [options] <data-dir> <lang-dir> <graph-dir> <model-dir>"
+  echo " Options:"
+  echo "    --stage (0|1|2)   # start scoring script from part-way through."
+  echo "e.g.:"
+  echo "$0 data/dev data/lang exp/tri5a/graph_pp exp/nnet3/tdnn"
+  exit 1;
+fi
+
+data=$1 # data directory 
+lang=$2 # data/lang
+graph=$3 #exp/tri5a/graph_pp
+dir=$4 # exp/nnet3/tdnn
+
+model_affix=`basename $dir`
+ivector_affix=${affix:+_$affix}_chain_${model_affix}${iter:+_iter$iter}
+affix=${affix:+_${affix}}${iter:+_iter${iter}}
+
+if [ $stage -le 1 ]; then
+  if [ ! -s ${data}_hires/feats.scp ]; then
+    utils/copy_data_dir.sh $data ${data}_hires
+    steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd "$train_cmd" ${data}_hires
+    steps/compute_cmvn_stats.sh ${data}_hires
+    utils/fix_data_dir.sh ${data}_hires
+  fi
+fi
+
+data_set=$(basename $data)
+if [ $stage -le 2 ]; then
+  echo "Extracting i-vectors, stage 1"
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+    --max-count $max_count \
+    ${data}_hires $ivector_dir/extractor \
+    $ivector_dir/ivectors_${data_set}${ivector_affix}_stage1;
+  # float comparisons are hard in bash
+  if [ `bc <<< "$ivector_scale != 1"` -eq 1 ]; then
+    ivector_scale_affix=_scale$ivector_scale
+  else
+    ivector_scale_affix=
+  fi
+
+  if [ ! -z "$ivector_scale_affix" ]; then
+    echo "$0: Scaling iVectors, stage 1"
+    srcdir=$ivector_dir/ivectors_${data_set}${ivector_affix}_stage1
+    outdir=$ivector_dir/ivectors_${data_set}${ivector_affix}${ivector_scale_affix}_stage1
+    mkdir -p $outdir
+    $train_cmd $outdir/log/scale_ivectors.log \
+      copy-matrix --scale=$ivector_scale scp:$srcdir/ivector_online.scp ark:- \| \
+      copy-feats --compress=true ark:-  ark,scp:$outdir/ivector_online.ark,$outdir/ivector_online.scp;
+    cp $srcdir/ivector_period $outdir/ivector_period
+  fi
+fi
+
+decode_dir=$dir/decode${graph_affix}_${data_set}${affix}
+# generate the lattices
+if [ $stage -le 3 ]; then
+  echo "Generating lattices, stage 1"
+  steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" \
+    --acwt $acwt --post-decode-acwt $post_decode_acwt \
+    --extra-left-context $extra_left_context  \
+    --extra-right-context $extra_right_context  \
+    --extra-left-context-initial $extra_left_context_initial \
+    --extra-right-context-final $extra_right_context_final \
+    --frames-per-chunk "$frames_per_chunk" \
+    --online-ivector-dir $ivector_dir/ivectors_${data_set}${ivector_affix}${ivector_scale_affix}_stage1 \
+    --skip-scoring true ${iter:+--iter $iter} \
+    $graph ${data}_hires ${decode_dir}_stage1;
+fi
+
+if [ $stage -le 4 ]; then
+  if $get_weights_from_ctm; then
+    if [ ! -z $weights_file ]; then
+      echo "$0: Using provided vad weights file $weights_file"
+      ivector_extractor_weights=$weights_file
+    else
+      echo "$0 : Generating vad weights file"
+      ivector_extractor_weights=${decode_dir}_stage1/weights${affix}.gz
+      local/extract_vad_weights.sh --silence-weight $silence_weight \
+        --cmd "$decode_cmd" ${iter:+--iter $iter} \
+        ${data}_hires $lang \
+        ${decode_dir}_stage1 $ivector_extractor_weights
+    fi
+  else
+    # get weights from best path decoding
+    ivector_extractor_weights=${decode_dir}_stage1
+  fi
+fi
+
+if [ $stage -le 5 ]; then
+  echo "Extracting i-vectors, stage 2 with weights from $ivector_extractor_weights"
+  # this does offline decoding, except we estimate the iVectors per
+  # speaker, excluding silence (based on alignments from a DNN decoding), with a
+  # different script.  This is just to demonstrate that script.
+  # the --sub-speaker-frames is optional; if provided, it will divide each speaker
+  # up into "sub-speakers" of at least that many frames... can be useful if
+  # acoustic conditions drift over time within the speaker's data.
+  steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj $nj \
+    --silence-weight $silence_weight \
+    --sub-speaker-frames $sub_speaker_frames --max-count $max_count \
+    ${data}_hires $lang $ivector_dir/extractor \
+    $ivector_extractor_weights $ivector_dir/ivectors_${data_set}${ivector_affix};
+fi
+
+if [ $stage -le 6 ]; then
+  echo "Generating lattices, stage 2 with --acwt $acwt"
+  rm -f ${decode_dir}/.error
+  steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" $pass2_decode_opts \
+      --acwt $acwt --post-decode-acwt $post_decode_acwt \
+      --extra-left-context $extra_left_context  \
+      --extra-right-context $extra_right_context  \
+      --extra-left-context-initial $extra_left_context_initial \
+      --extra-right-context-final $extra_right_context_final \
+      --frames-per-chunk "$frames_per_chunk" \
+      --skip-scoring false ${iter:+--iter $iter} --lattice-beam $lattice_beam \
+      --online-ivector-dir $ivector_dir/ivectors_${data_set}${ivector_affix} \
+     $graph ${data}_hires ${decode_dir} || touch ${decode_dir}/.error
+  [ -f ${decode_dir}/.error ] && echo "$0: Error decoding" && exit 1;
+fi
+exit 0
diff --git a/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh b/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..3910e1812a3
--- /dev/null
+++ b/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# This script is called from local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more
+# scripts).  It contains the common feature preparation and
+# iVector-related parts of the script.  See those scripts for examples
+# of usage.
+
+stage=0
+train_set=train_worn_u100k
+test_sets="dev_worn dev_beamformit_ref"
+gmm=tri3
+nj=96
+
+nnet3_affix=_train_worn_u100k
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+if [ $stage -le 1 ]; then
+  # Although the nnet will be trained by high resolution data, we still have to
+  # perturb the normal data to get the alignment _sp stands for speed-perturbed
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+  echo "$0: making MFCC features for low-resolution speed-perturbed data"
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/${train_set}_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj ${nj} --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b1{5,6,8,9}/$USER/kaldi-data/mfcc/chime5-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1;
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing a subset of data to train the diagonal UBM."
+  # We'll use about a quarter of the data.
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+  num_utts=$[$num_utts_total/4]
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+     $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
+    --num-frames 700000 \
+    --num-threads 8 \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 5 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of
+  # 100.
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 20 \
+     data/${train_set}_sp_hires exp/nnet3${nnet3_affix}/diag_ubm \
+     exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+
+if [ $stage -le 6 ]; then
+  # We extract iVectors on the speed-perturbed training data after combining
+  # short segments, which will be what we train the system on.  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker; this gives more diversity in iVectors..
+  # Note that these are extracted 'online'.
+
+  # note, we don't encode the 'max2' in the name of the ivectordir even though
+  # that's the data we extract the ivectors from, as it's still going to be
+  # valid for the non-'max2' data, the utterance list is the same.
+
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/chime5-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  temp_data_root=${ivectordir}
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${train_set}_sp_hires ${temp_data_root}/${train_set}_sp_hires_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj ${nj} \
+    ${temp_data_root}/${train_set}_sp_hires_max2 \
+    exp/nnet3${nnet3_affix}/extractor $ivectordir
+fi
+
+if [ $stage -le 7 ]; then
+  # Also extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+  for data in $test_sets; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
+      data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
+      exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+  done
+fi
+
+exit 0
diff --git a/egs/chime6/s5_track1/local/prepare_data.sh b/egs/chime6/s5_track1/local/prepare_data.sh
new file mode 100755
index 00000000000..3d1ffe859a5
--- /dev/null
+++ b/egs/chime6/s5_track1/local/prepare_data.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
+# Apache 2.0
+
+# Begin configuration section.
+mictype=worn # worn, ref or others
+cleanup=true
+# End configuration section
+. ./utils/parse_options.sh  # accept options.. you can run this run.sh with the
+
+. ./path.sh
+
+echo >&2 "$0" "$@"
+if [ $# -ne 3 ] ; then
+  echo >&2 "$0" "$@"
+  echo >&2 "$0: Error: wrong number of arguments"
+  echo -e >&2 "Usage:\n  $0 [opts] <audio-dir> <json-transcript-dir> <output-dir>"
+  echo -e >&2 "eg:\n  $0 /corpora/chime5/audio/train /corpora/chime5/transcriptions/train data/train"
+  exit 1
+fi
+
+set -e -o pipefail
+
+adir=$(utils/make_absolute.sh $1)
+jdir=$2
+dir=$3
+
+json_count=$(find -L $jdir -name "*.json" | wc -l)
+wav_count=$(find -L $adir -name "*.wav" | wc -l)
+
+if [ "$json_count" -eq 0 ]; then
+  echo >&2 "We expect that the directory $jdir will contain json files."
+  echo >&2 "That implies you have supplied a wrong path to the data."
+  exit 1
+fi
+if [ "$wav_count" -eq 0 ]; then
+  echo >&2 "We expect that the directory $adir will contain wav files."
+  echo >&2 "That implies you have supplied a wrong path to the data."
+  exit 1
+fi
+
+echo "$0: Converting transcription to text"
+
+mkdir -p $dir
+
+for file in $jdir/*json; do
+  ./local/json2text.py --mictype $mictype $file
+done | \
+  sed -e "s/\[inaudible[- 0-9]*\]/[inaudible]/g" |\
+  sed -e 's/ - / /g' |\
+  sed -e 's/mm-/mm/g' > $dir/text.orig
+
+echo "$0: Creating datadir $dir for type=\"$mictype\""
+
+if [ $mictype == "worn" ]; then
+  # convert the filenames to wav.scp format, use the basename of the file
+  # as a the wav.scp key, add .L and .R for left and right channel
+  # i.e. each file will have two entries (left and right channel)
+  find -L $adir -name  "S[0-9]*_P[0-9]*.wav" | \
+    perl -ne '{
+      chomp;
+      $path = $_;
+      next unless $path;
+      @F = split "/", $path;
+      ($f = $F[@F-1]) =~ s/.wav//;
+      @F = split "_", $f;
+      print "${F[1]}_${F[0]}.L sox $path -t wav - remix 1 |\n";
+      print "${F[1]}_${F[0]}.R sox $path -t wav - remix 2 |\n";
+    }' | sort > $dir/wav.scp
+
+  # generate the transcripts for both left and right channel
+  # from the original transcript in the form
+  # P09_S03-0006072-0006147 gimme the baker
+  # create left and right channel transcript
+  # P09_S03.L-0006072-0006147 gimme the baker
+  # P09_S03.R-0006072-0006147 gimme the baker
+  sed -n 's/  *$//; h; s/-/\.L-/p; g; s/-/\.R-/p' $dir/text.orig | sort > $dir/text
+elif [ $mictype == "ref" ]; then
+  # fixed reference array
+
+  # first get a text, which will be used to extract reference arrays
+  perl -ne 's/-/.ENH-/;print;' $dir/text.orig | sort > $dir/text
+
+  find -L $adir | grep "\.wav" | sort > $dir/wav.flist
+  # following command provide the argument for grep to extract only reference arrays
+  grep `cut -f 1 -d"-" $dir/text | awk -F"_" '{print $2 "_" $3}' | sed -e "s/\.ENH//" | sort | uniq | sed -e "s/^/ -e /" | tr "\n" " "` $dir/wav.flist > $dir/wav.flist2
+  paste -d" " \
+	<(awk -F "/" '{print $NF}' $dir/wav.flist2 | sed -e "s/\.wav/.ENH/") \
+	$dir/wav.flist2 | sort > $dir/wav.scp
+elif [ $mictype == "gss" ]; then
+  find -L $adir -name  "P[0-9]*_S[0-9]*.wav" | \
+    perl -ne '{
+      chomp;
+      $path = $_;
+      next unless $path;
+      @F = split "/", $path;
+      ($f = $F[@F-1]) =~ s/.wav//;
+      print "$f $path\n";
+    }' | sort > $dir/wav.scp
+
+  cat $dir/text.orig | sort > $dir/text
+else
+  # array mic case
+  # convert the filenames to wav.scp format, use the basename of the file
+  # as a the wav.scp key
+  find -L $adir -name "*.wav" -ipath "*${mictype}*" |\
+    perl -ne '$p=$_;chomp $_;@F=split "/";$F[$#F]=~s/\.wav//;print "$F[$#F] $p";' |\
+    sort -u > $dir/wav.scp
+
+  # convert the transcripts from
+  # P09_S03-0006072-0006147 gimme the baker
+  # to the per-channel transcripts
+  # P09_S03_U01_NOLOCATION.CH1-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH2-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH3-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH4-0006072-0006147 gimme the baker
+  perl -ne '$l=$_;
+    for($i=1; $i<=4; $i++) {
+      ($x=$l)=~ s/-/.CH\Q$i\E-/;
+      print $x;}' $dir/text.orig | sort > $dir/text
+
+fi
+$cleanup && rm -f $dir/text.* $dir/wav.scp.* $dir/wav.flist
+
+# Prepare 'segments', 'utt2spk', 'spk2utt'
+if [ $mictype == "worn" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" \
+    > $dir/segments
+elif [ $mictype == "ref" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" |\
+    sed -e "s/ P.._/ /" > $dir/segments
+elif [ $mictype != "gss" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" |\
+    sed -e 's/ P.._/ /' > $dir/segments
+fi
+
+cut -f 1 -d ' ' $dir/text | \
+  perl -ne 'chomp;$utt=$_;s/_.*//;print "$utt $_\n";' > $dir/utt2spk
+
+utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
+
+# Check that data dirs are okay!
+utils/validate_data_dir.sh --no-feats $dir || exit 1
diff --git a/egs/chime6/s5_track1/local/prepare_dict.sh b/egs/chime6/s5_track1/local/prepare_dict.sh
new file mode 100755
index 00000000000..09083d0e795
--- /dev/null
+++ b/egs/chime6/s5_track1/local/prepare_dict.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
+# License: Apache 2.0
+
+# Begin configuration section.
+# End configuration section
+. ./utils/parse_options.sh
+
+. ./path.sh
+
+set -e -o pipefail
+set -o nounset                              # Treat unset variables as an error
+
+
+# The parts of the output of this that will be needed are
+# [in data/local/dict/ ]
+# lexicon.txt
+# extra_questions.txt
+# nonsilence_phones.txt
+# optional_silence.txt
+# silence_phones.txt
+
+
+# check existing directories
+[ $# != 0 ] && echo "Usage: $0" && exit 1;
+
+dir=data/local/dict
+
+mkdir -p $dir
+echo "$0: Getting CMU dictionary"
+if [ ! -f $dir/cmudict.done ]; then
+  [ -d $dir/cmudict ] && rm -rf $dir/cmudict
+  svn co https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict $dir/cmudict
+  touch $dir/cmudict.done
+fi
+
+# silence phones, one per line.
+for w in sil spn inaudible laughs noise; do
+  echo $w;
+done > $dir/silence_phones.txt
+echo sil > $dir/optional_silence.txt
+
+# For this setup we're discarding stress.
+cat $dir/cmudict/cmudict-0.7b.symbols | \
+  perl -ne 's:[0-9]::g; s:\r::; print lc($_)' | \
+  sort -u > $dir/nonsilence_phones.txt
+
+# An extra question will be added by including the silence phones in one class.
+paste -d ' ' -s $dir/silence_phones.txt > $dir/extra_questions.txt
+
+grep -v ';;;' $dir/cmudict/cmudict-0.7b |\
+  uconv -f latin1 -t utf-8 -x Any-Lower |\
+  perl -ne 's:(\S+)\(\d+\) :$1 :; s:  : :; print;' |\
+  perl -ne '@F = split " ",$_,2; $F[1] =~ s/[0-9]//g; print "$F[0] $F[1]";' \
+  > $dir/lexicon1_raw_nosil.txt || exit 1;
+
+# Add prons for laughter, noise, oov
+for w in `grep -v sil $dir/silence_phones.txt`; do
+  echo "[$w] $w"
+done | cat - $dir/lexicon1_raw_nosil.txt > $dir/lexicon2_raw.txt || exit 1;
+
+# we keep all words from the cmudict in the lexicon
+# might reduce OOV rate on dev and eval
+cat $dir/lexicon2_raw.txt  \
+   <( echo "mm m"
+      echo "<unk> spn"
+      echo "cuz k aa z"
+      echo "cuz k ah z"
+      echo "cuz k ao z"
+      echo "mmm m"; \
+      echo "hmm hh m"; \
+    ) | sort -u | sed 's/[\t ]/\t/' > $dir/iv_lexicon.txt
+
+
+cat data/train*/text  | \
+  awk '{for (n=2;n<=NF;n++){ count[$n]++; } } END { for(n in count) { print count[n], n; }}' | \
+  sort -nr > $dir/word_counts
+
+cat $dir/word_counts | awk '{print $2}' > $dir/word_list
+
+awk '{print $1}' $dir/iv_lexicon.txt | \
+  perl -e '($word_counts)=@ARGV;
+   open(W, "<$word_counts")||die "opening word-counts $word_counts";
+   while(<STDIN>) { chop; $seen{$_}=1; }
+   while(<W>) {
+     ($c,$w) = split;
+     if (!defined $seen{$w}) { print; }
+   } ' $dir/word_counts > $dir/oov_counts.txt
+
+echo "*Highest-count OOVs (including fragments) are:"
+head -n 10 $dir/oov_counts.txt
+echo "*Highest-count OOVs (excluding fragments) are:"
+grep -v -E '^-|-$' $dir/oov_counts.txt | head -n 10 || true
+
+echo "*Training a G2P and generating missing pronunciations"
+mkdir -p $dir/g2p/
+phonetisaurus-align --input=$dir/iv_lexicon.txt --ofile=$dir/g2p/aligned_lexicon.corpus
+ngram-count -order 4 -kn-modify-counts-at-end -ukndiscount\
+  -gt1min 0 -gt2min 0 -gt3min 0 -gt4min 0 \
+  -text $dir/g2p/aligned_lexicon.corpus -lm $dir/g2p/aligned_lexicon.arpa
+phonetisaurus-arpa2wfst --lm=$dir/g2p/aligned_lexicon.arpa --ofile=$dir/g2p/g2p.fst
+awk '{print $2}' $dir/oov_counts.txt > $dir/oov_words.txt
+phonetisaurus-apply --nbest 2 --model $dir/g2p/g2p.fst --thresh 5 --accumulate \
+  --word_list $dir/oov_words.txt > $dir/oov_lexicon.txt
+
+## The next section is again just for debug purposes
+## to show words for which the G2P failed
+cat $dir/oov_lexicon.txt $dir/iv_lexicon.txt | sort -u > $dir/lexicon.txt
+rm -f $dir/lexiconp.txt 2>/dev/null; # can confuse later script if this exists.
+awk '{print $1}' $dir/lexicon.txt | \
+  perl -e '($word_counts)=@ARGV;
+   open(W, "<$word_counts")||die "opening word-counts $word_counts";
+   while(<STDIN>) { chop; $seen{$_}=1; }
+   while(<W>) {
+     ($c,$w) = split;
+     if (!defined $seen{$w}) { print; }
+   } ' $dir/word_counts > $dir/oov_counts.g2p.txt
+
+echo "*Highest-count OOVs (including fragments) after G2P are:"
+head -n 10 $dir/oov_counts.g2p.txt
+
+utils/validate_dict_dir.pl $dir
+exit 0;
+
diff --git a/egs/chime6/s5_track1/local/replace_uttid.py b/egs/chime6/s5_track1/local/replace_uttid.py
new file mode 100755
index 00000000000..96c45b58783
--- /dev/null
+++ b/egs/chime6/s5_track1/local/replace_uttid.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+# Copyright Ashish Arora
+# Apache 2.0
+# This script is used in score_for_submit. It adds locationid to the utteranceid,
+# using uttid_location file, for locationwise scoring.
+
+import sys, io
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+
+def load_uttid_location(f):
+    locations = {}
+    for line in f:
+        parts=line.strip().split(' ')
+        uttid, loc = parts[0], parts[1]
+        locations[uttid] = loc
+    return locations
+
+locations = load_uttid_location(open(sys.argv[1],'r', encoding='utf8'))
+
+for line in open(sys.argv[2],'r', encoding='utf8'):
+    uttid, res = line.split(None, 1)
+    try:
+        location = locations[uttid]
+        location_uttid = location +'_'+ str(uttid)
+        output.write(location_uttid + ' ' + res)
+    except KeyError as e:
+            raise Exception("Could not find utteranceid in "
+                            "uttid_location file"
+                            "({0})\n".format(str(e)))
diff --git a/egs/chime6/s5_track1/local/reverberate_lat_dir.sh b/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
new file mode 100755
index 00000000000..f601a37c0e1
--- /dev/null
+++ b/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+# Copyright 2018  Vimal Manohar
+# Apache 2.0
+
+num_data_reps=1
+cmd=run.pl
+nj=20
+include_clean=false
+
+. utils/parse_options.sh
+. ./path.sh
+
+if [ $# -ne 4 ]; then
+  echo "Usage: $0 <train-data-dir> <noisy-latdir> <clean-latdir> <output-latdir>"
+  exit 1
+fi
+
+train_data_dir=$1
+noisy_latdir=$2
+clean_latdir=$3
+dir=$4
+
+clean_nj=$(cat $clean_latdir/num_jobs)
+
+$cmd JOB=1:$clean_nj $dir/copy_clean_lattices.JOB.log \
+  lattice-copy "ark:gunzip -c $clean_latdir/lat.JOB.gz |" \
+  ark,scp:$dir/lats_clean.JOB.ark,$dir/lats_clean.JOB.scp || exit 1
+  
+for n in $(seq $clean_nj); do
+  cat $dir/lats_clean.$n.scp 
+done > $dir/lats_clean.scp
+
+for i in $(seq $num_data_reps); do
+  cat $dir/lats_clean.scp | awk -vi=$i '{print "rev"i"_"$0}'
+done > $dir/lats_rvb.scp
+
+noisy_nj=$(cat $noisy_latdir/num_jobs)
+$cmd JOB=1:$noisy_nj $dir/copy_noisy_lattices.JOB>log \
+  lattice-copy "ark:gunzip -c $noisy_latdir/lat.JOB.gz |" \
+  ark,scp:$dir/lats_noisy.JOB.ark,$dir/lats_noisy.JOB.scp || exit 1
+
+optional_clean=
+if $include_clean; then
+  optional_clean=$dir/lats_clean.scp
+fi
+
+for n in $(seq $noisy_nj); do
+  cat $dir/lats_noisy.$n.scp
+done | cat - $dir/lats_rvb.scp ${optional_clean} | sort -k1,1 > $dir/lats.scp
+
+utils/split_data.sh $train_data_dir $nj
+$cmd JOB=1:$nj $dir/copy_lattices.JOB.log \
+  lattice-copy "scp:utils/filter_scp.pl $train_data_dir/split$nj/JOB/utt2spk $dir/lats.scp |" \
+  "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1
+
+echo $nj > $dir/num_jobs
+
+if [ -f $clean_latdir/ali.1.gz ]; then
+  $cmd JOB=1:$clean_nj $dir/copy_clean_alignments.JOB.log \
+    copy-int-vector "ark:gunzip -c $clean_latdir/ali.JOB.gz |" \
+    ark,scp:$dir/ali_clean.JOB.ark,$dir/ali_clean.JOB.scp
+    
+  for n in $(seq $clean_nj); do
+    cat $dir/ali_clean.$n.scp 
+  done > $dir/ali_clean.scp
+
+  for i in $(seq $num_data_reps); do
+    cat $dir/ali_clean.scp | awk -vi=$i '{print "rev"i"_"$0}'
+  done > $dir/ali_rvb.scp
+  
+  optional_clean=
+  if $include_clean; then
+    optional_clean=$dir/ali_clean.scp
+  fi
+
+  $cmd JOB=1:$noisy_nj $dir/copy_noisy_alignments.JOB.log \
+    copy-int-vector "ark:gunzip -c $noisy_latdir/ali.JOB.gz |" \
+    ark,scp:$dir/ali_noisy.JOB.ark,$dir/ali_noisy.JOB.scp
+
+  for n in $(seq $noisy_nj); do
+    cat $dir/ali_noisy.$n.scp
+  done | cat - $dir/ali_rvb.scp $optional_clean | sort -k1,1 > $dir/ali.scp
+
+  utils/split_data.sh $train_data_dir $nj || exit 1
+  $cmd JOB=1:$nj $dir/copy_rvb_alignments.JOB.log \
+    copy-int-vector "scp:utils/filter_scp.pl $train_data_dir/split$nj/JOB/utt2spk $dir/ali.scp |" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1
+fi
+
+cp $clean_latdir/{final.*,tree,*.mat,*opts,*.txt} $dir || true
+
+rm $dir/lats_{clean,noisy}.*.{ark,scp} $dir/ali_{clean,noisy}.*.{ark,scp} || true # save space
diff --git a/egs/chime6/s5_track1/local/run_beamformit.sh b/egs/chime6/s5_track1/local/run_beamformit.sh
new file mode 100755
index 00000000000..aa3badd90d8
--- /dev/null
+++ b/egs/chime6/s5_track1/local/run_beamformit.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
+
+. ./cmd.sh
+. ./path.sh
+
+# Config:
+cmd=run.pl
+bmf="1 2 3 4"
+
+. utils/parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: local/run_beamformit.sh [options] <wav-in-dir> <wav-out-dir> <array-id>"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --bmf \"1 2 3 4\"                        # microphones used for beamforming"
+   exit 1;
+fi
+
+sdir=$1
+odir=$2
+array=$3
+expdir=exp/enhan/`echo $odir | awk -F '/' '{print $NF}'`_`echo $bmf | tr ' ' '_'`
+
+if ! command  -v BeamformIt &>/dev/null ; then
+  echo "Missing BeamformIt, run 'cd $KALDI_ROOT/tools/; ./extras/install_beamformit.sh; cd -;'" && exit 1
+fi
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+mkdir -p $odir
+mkdir -p $expdir/log
+
+echo "Will use the following channels: $bmf"
+# number of channels
+numch=`echo $bmf | tr ' ' '\n' | wc -l`
+echo "the number of channels: $numch"
+
+# wavfiles.list can be used as the name of the output files
+output_wavfiles=$expdir/wavfiles.list
+find -L ${sdir} | grep -i ${array} | awk -F "/" '{print $NF}' | sed -e "s/\.CH.\.wav//" | sort | uniq > $expdir/wavfiles.list
+
+# this is an input file list of the microphones
+# format: 1st_wav 2nd_wav ... nth_wav
+input_arrays=$expdir/channels_$numch
+for x in `cat $output_wavfiles`; do
+  echo -n "$x"
+  for ch in $bmf; do
+    echo -n " $x.CH$ch.wav"
+  done
+  echo ""
+done > $input_arrays
+
+# split the list for parallel processing
+# number of jobs are set by the number of WAV files
+nj=`wc -l $expdir/wavfiles.list | awk '{print $1}'`
+split_wavfiles=""
+for n in `seq $nj`; do
+  split_wavfiles="$split_wavfiles $output_wavfiles.$n"
+done
+utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1;
+
+echo -e "Beamforming\n"
+# making a shell script for each job
+for n in `seq $nj`; do
+cat << EOF > $expdir/log/beamform.$n.sh
+while read line; do
+  $BEAMFORMIT/BeamformIt -s \$line -c $input_arrays \
+    --config_file `pwd`/conf/beamformit.cfg \
+    --source_dir $sdir \
+    --result_dir $odir
+done < $output_wavfiles.$n
+EOF
+done
+
+chmod a+x $expdir/log/beamform.*.sh
+$cmd JOB=1:$nj $expdir/log/beamform.JOB.log \
+  $expdir/log/beamform.JOB.sh
+
+echo "`basename $0` Done."
diff --git a/egs/chime6/s5_track1/local/run_gss.sh b/egs/chime6/s5_track1/local/run_gss.sh
new file mode 100755
index 00000000000..fbdc4af25d1
--- /dev/null
+++ b/egs/chime6/s5_track1/local/run_gss.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
+
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
+
+# Config:
+cmd=run.pl
+nj=4
+multiarray=outer_array_mics
+bss_iterations=5
+context_samples=160000
+. utils/parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: local/run_gss.sh [options] <session-id> <log-dir> <enhanced-dir>"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --bss_iterations 5                       # Number of EM iterations"
+   echo "  --context_samples 160000                 # Left-right context in number of samples"
+   echo "  --multiarray <configuration>             # Multiarray configuration"
+   exit 1;
+fi
+
+# setting multiarray as "true" uses all mics, we didn't see any performance
+# gain from this we have chosen settings that makes the enhacement finish
+# in around 1/3 of a day without significant change in performance.
+# our result during the experiments are as follows:
+
+#MAF: multi array = False
+#MAT: multi array = True
+#Enhancement  Iterations  Num Microphones  Context  Computational time for GSS  #cpus  dev WER  eval WER
+#GSS(MAF)     10           24                        17   hrs                   30     62.3     57.98
+#GSS(MAT)      5           24               10s      26   hrs                   50     53.15    53.77
+#GSS(MAT)      5           12               10s       9.5 hrs                   50     53.09    53.75
+
+session_id=$1
+log_dir=$2
+enhanced_dir=$3
+if [ ! -d pb_chime5/ ]; then
+  echo "Missing pb_chime5, run 'local/install_pb_chime5'" 
+  exit 1
+fi
+
+miniconda_dir=$HOME/miniconda3/
+if [ ! -d $miniconda_dir/ ]; then
+  echo "$miniconda_dir/ does not exist. Please run '../../../tools/extras/install_miniconda.sh'"
+  exit 1
+fi
+
+enhanced_dir=$(utils/make_absolute.sh $enhanced_dir) || \
+  { echo "Could not make absolute '$enhanced_dir'" && exit 1; }
+
+$cmd JOB=1:$nj $log_dir/log/enhance_${session_id}.JOB.log \
+  cd pb_chime5/ '&&' \
+  $miniconda_dir/bin/python -m pb_chime5.scripts.kaldi_run with \
+    chime6=True \
+    storage_dir=$enhanced_dir \
+    session_id=$session_id \
+    job_id=JOB number_of_jobs=$nj \
+    bss_iterations=$bss_iterations \
+    context_samples=$context_samples \
+    multiarray=$multiarray || exit 1
diff --git a/egs/chime6/s5_track1/local/run_wpe.py b/egs/chime6/s5_track1/local/run_wpe.py
new file mode 100755
index 00000000000..fbb264f2fd2
--- /dev/null
+++ b/egs/chime6/s5_track1/local/run_wpe.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
+# Apache 2.0
+# Works with both python2 and python3
+# This script assumes that WPE (nara_wpe) is installed locally using miniconda.
+# ../../../tools/extras/install_miniconda.sh and ../../../tools/extras/install_wpe.sh
+# needs to be run and this script needs to be launched run with that version of
+# python.
+# See local/run_wpe.sh for example.
+
+import numpy as np
+import soundfile as sf
+import time
+import os, errno
+from tqdm import tqdm
+import argparse
+
+# to avoid huge memory consumption we decided to use `wpe_v8` instead of the original wpe by
+# following the advice from Christoph Boeddeker at Paderborn University
+# https://github.com/chimechallenge/kaldi_chime6/commit/2ea6ac07ef66ad98602f073b24a233cb7f61605c#r36147334
+from nara_wpe.wpe import wpe_v8 as wpe
+from nara_wpe.utils import stft, istft
+from nara_wpe import project_root
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--files', '-f', nargs='+')
+args = parser.parse_args()
+
+input_files = args.files[:len(args.files)//2]
+output_files = args.files[len(args.files)//2:]
+out_dir = os.path.dirname(output_files[0])
+try:
+    os.makedirs(out_dir)
+except OSError as e:
+    if e.errno != errno.EEXIST:
+        raise
+
+stft_options = dict(
+    size=512,
+    shift=128,
+    window_length=None,
+    fading=True,
+    pad=True,
+    symmetric_window=False
+)
+
+sampling_rate = 16000
+delay = 3
+iterations = 5
+taps = 10
+
+signal_list = [
+    sf.read(f)[0]
+    for f in input_files
+]
+y = np.stack(signal_list, axis=0)
+Y = stft(y, **stft_options).transpose(2, 0, 1)
+Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0)
+z = istft(Z, size=stft_options['size'], shift=stft_options['shift'])
+
+for d in range(len(signal_list)):
+    sf.write(output_files[d], z[d,:], sampling_rate)
diff --git a/egs/chime6/s5_track1/local/run_wpe.sh b/egs/chime6/s5_track1/local/run_wpe.sh
new file mode 100755
index 00000000000..ed512e69aae
--- /dev/null
+++ b/egs/chime6/s5_track1/local/run_wpe.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
+# Apache 2.0
+
+. ./cmd.sh
+. ./path.sh
+
+# Config:
+nj=4
+cmd=run.pl
+
+. utils/parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: local/run_wpe.sh [options] <wav-in-dir> <wav-out-dir> <array-id>"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --nj 50                        # number of jobs for parallel processing"
+   exit 1;
+fi
+
+sdir=$1
+odir=$2
+array=$3
+task=`basename $sdir`
+expdir=exp/wpe/${task}_${array}
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+miniconda_dir=$HOME/miniconda3/
+if [ ! -d $miniconda_dir ]; then
+    echo "$miniconda_dir does not exist. Please run '$KALDI_ROOT/tools/extras/install_miniconda.sh'."
+    exit 1
+fi
+
+# check if WPE is installed
+result=`$miniconda_dir/bin/python -c "\
+try:
+    import nara_wpe
+    print('1')
+except ImportError:
+    print('0')"`
+
+if [ "$result" == "1" ]; then
+    echo "WPE is installed"
+else
+    echo "WPE is not installed. Please run ../../../tools/extras/install_wpe.sh"
+    exit 1
+fi
+
+mkdir -p $odir
+mkdir -p $expdir/log
+
+# wavfiles.list can be used as the name of the output files
+output_wavfiles=$expdir/wavfiles.list
+find -L ${sdir} | grep -i ${array} > $expdir/channels_input
+cat $expdir/channels_input | awk -F '/' '{print $NF}' | sed "s@S@$odir\/S@g" > $expdir/channels_output
+paste -d" " $expdir/channels_input $expdir/channels_output > $output_wavfiles
+
+# split the list for parallel processing
+split_wavfiles=""
+for n in `seq $nj`; do
+  split_wavfiles="$split_wavfiles $output_wavfiles.$n"
+done
+utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1;
+
+echo -e "Dereverberation - $task - $array\n"
+# making a shell script for each job
+for n in `seq $nj`; do
+cat <<-EOF > $expdir/log/wpe.$n.sh
+while read line; do
+  $miniconda_dir/bin/python local/run_wpe.py \
+    --file \$line
+done < $output_wavfiles.$n
+EOF
+done
+
+chmod a+x $expdir/log/wpe.*.sh
+$cmd JOB=1:$nj $expdir/log/wpe.JOB.log \
+  $expdir/log/wpe.JOB.sh
+
+echo "`basename $0` Done."
diff --git a/egs/chime6/s5_track1/local/score.sh b/egs/chime6/s5_track1/local/score.sh
new file mode 120000
index 00000000000..6a200b42ed3
--- /dev/null
+++ b/egs/chime6/s5_track1/local/score.sh
@@ -0,0 +1 @@
+../steps/scoring/score_kaldi_wer.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track1/local/score_for_submit.sh b/egs/chime6/s5_track1/local/score_for_submit.sh
new file mode 100755
index 00000000000..ba7d6cde574
--- /dev/null
+++ b/egs/chime6/s5_track1/local/score_for_submit.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
+# Copyright 2019       Johns Hopkins University (Author: Shinji Watanabe)
+# Apache 2.0
+#
+# This script provides official CHiME-6 challenge track 1 submission scores per room and session.
+# It first calculates the best search parameter configurations by using the dev set
+# and also create the transcriptions for dev and eval sets to be submitted.
+# The default setup does not calculate scores of the evaluation set since
+# the evaluation transcription is not distributed (July 9 2018)
+
+cmd=run.pl
+dev=exp/chain_train_worn_u100k_cleaned/tdnn1a_sp/decode_dev_beamformit_ref
+eval=exp/chain_train_worn_u100k_cleaned/tdnn1a_sp/decode_eval_beamformit_ref
+do_eval=true
+enhancement=gss
+json=
+
+echo "$0 $@"  # Print the command line for logging
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 0 ]; then
+    echo "Usage: $0 [--cmd (run.pl|queue.pl...)]"
+    echo "This script provides official CHiME-6 challenge submission scores"
+    echo " Options:"
+    echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+    echo "    --dev <dev-decode-dir>          # dev set decoding directory"
+    echo "    --eval <eval-decode-dir>        # eval set decoding directory"
+    echo "    --enhancement                   # enhancement type (gss or beamformit)"
+    echo "    --json <json-directory>         # directory containing CHiME-6 json files"
+    exit 1;
+fi
+
+# get language model weight and word insertion penalty from the dev set
+best_lmwt=`cat $dev/scoring_kaldi/wer_details/lmwt`
+best_wip=`cat $dev/scoring_kaldi/wer_details/wip`
+
+echo "best LM weight: $best_lmwt"
+echo "insertion penalty weight: $best_wip"
+
+echo "==== development set ===="
+# development set
+# get uttid location mapping
+local/add_location_to_uttid.sh --enhancement $enhancement $json/dev \
+  $dev/scoring_kaldi/wer_details/ $dev/scoring_kaldi/wer_details/uttid_location
+# get the scoring result per utterance
+score_result=$dev/scoring_kaldi/wer_details/per_utt_loc
+
+for session in S02 S09; do
+    for room in DINING KITCHEN LIVING; do
+	# get nerror
+	nerr=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$4+$5+$6} END {print sum}'`
+	# get nwords from references (NF-2 means to exclude utterance id and " ref ")
+	nwrd=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$3+$4+$6} END {print sum}'`
+	# compute wer with scale=2
+	wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc`
+	
+	# report the results
+	echo -n "session $session "
+	echo -n "room $room: "
+	echo -n "#words $nwrd, "
+	echo -n "#errors $nerr, "
+	echo "wer $wer %"
+    done
+done
+echo -n "overall: "
+# get nerror
+nerr=`grep "\#csid" $score_result | awk '{sum+=$4+$5+$6} END {print sum}'`
+# get nwords from references (NF-2 means to exclude utterance id and " ref ")
+nwrd=`grep "\#csid" $score_result | awk '{sum+=$3+$4+$6} END {print sum}'`
+# compute wer with scale=2
+wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc`
+echo -n "#words $nwrd, "
+echo -n "#errors $nerr, "
+echo "wer $wer %"
+
+echo "==== evaluation set ===="
+# evaluation set
+# get the scoring result per utterance. Copied from local/score.sh
+mkdir -p $eval/scoring_kaldi/wer_details_devbest
+$cmd $eval/scoring_kaldi/log/stats1.log \
+     cat $eval/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \
+     align-text --special-symbol="'***'" ark:$eval/scoring_kaldi/test_filt.txt ark:- ark,t:- \|  \
+     utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \> $eval/scoring_kaldi/wer_details_devbest/per_utt
+
+local/add_location_to_uttid.sh --enhancement $enhancement $json/eval \
+  $eval/scoring_kaldi/wer_details_devbest/ $eval/scoring_kaldi/wer_details_devbest/uttid_location
+
+score_result=$eval/scoring_kaldi/wer_details_devbest/per_utt_loc
+for session in S01 S21; do
+    for room in DINING KITCHEN LIVING; do
+	if $do_eval; then
+	    # get nerror
+	    nerr=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$4+$5+$6} END {print sum}'`
+	    # get nwords from references (NF-2 means to exclude utterance id and " ref ")
+	    nwrd=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$3+$4+$6} END {print sum}'`
+	    # compute wer with scale=2
+	    wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc`
+	
+	    # report the results
+	    echo -n "session $session "
+	    echo -n "room $room: "
+	    echo -n "#words $nwrd, "
+	    echo -n "#errors $nerr, "
+	    echo "wer $wer %"
+	fi
+    done
+done
+if $do_eval; then
+    # get nerror
+    nerr=`grep "\#csid" $score_result | awk '{sum+=$4+$5+$6} END {print sum}'`
+    # get nwords from references (NF-2 means to exclude utterance id and " ref ")
+    nwrd=`grep "\#csid" $score_result | awk '{sum+=$3+$4+$6} END {print sum}'`
+    # compute wer with scale=2
+    wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc`
+    echo -n "overall: "
+    echo -n "#words $nwrd, "
+    echo -n "#errors $nerr, "
+    echo "wer $wer %"
+else
+    echo "skip evaluation scoring"
+    echo ""
+    echo "==== when you submit your result to the CHiME-6 challenge track 1 ===="
+    echo "Please rename your recognition results of "
+    echo "$dev/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt"
+    echo "$eval/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt"
+    echo "with {dev,eval}_<last name>_<affiliation>.txt, e.g., dev_watanabe_jhu.txt and eval_watanabe_jhu.txt, "
+    echo "and submit both of them as your final challenge result"
+    echo "=================================================================="    
+fi
+
diff --git a/egs/chime6/s5_track1/local/train_lms_srilm.sh b/egs/chime6/s5_track1/local/train_lms_srilm.sh
new file mode 100755
index 00000000000..5a1d56d24b3
--- /dev/null
+++ b/egs/chime6/s5_track1/local/train_lms_srilm.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
+# Apache 2.0
+
+export LC_ALL=C
+
+# Begin configuration section.
+words_file=
+train_text=
+dev_text=
+oov_symbol="<UNK>"
+# End configuration section
+
+echo "$0 $@"
+
+[ -f path.sh ]  && . ./path.sh
+. ./utils/parse_options.sh || exit 1
+
+echo "-------------------------------------"
+echo "Building an SRILM language model     "
+echo "-------------------------------------"
+
+if [ $# -ne 2 ] ; then
+  echo "Incorrect number of parameters. "
+  echo "Script has to be called like this:"
+  echo "  $0 [switches] <datadir> <tgtdir>"
+  echo "For example: "
+  echo "  $0 data data/srilm"
+  echo "The allowed switches are: "
+  echo "    words_file=<word_file|>        word list file -- data/lang/words.txt by default"
+  echo "    train_text=<train_text|>       data/train/text is used in case when not specified"
+  echo "    dev_text=<dev_text|>           last 10 % of the train text is used by default"
+  echo "    oov_symbol=<unk_sumbol|<UNK>>  symbol to use for oov modeling -- <UNK> by default"
+  exit 1
+fi
+
+datadir=$1
+tgtdir=$2
+
+##End of configuration
+loc=`which ngram-count`;
+if [ -z $loc ]; then
+  echo >&2 "You appear to not have SRILM tools installed, either on your path,"
+  echo >&2 "Use the script \$KALDI_ROOT/tools/install_srilm.sh to install it."
+  exit 1
+fi
+
+# Prepare the destination directory
+mkdir -p $tgtdir
+
+for f in $words_file $train_text $dev_text; do
+  [ ! -s $f ] && echo "No such file $f" && exit 1;
+done
+
+[ -z $words_file ] && words_file=$datadir/lang/words.txt
+if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then
+  nr=`cat  $train_text | wc -l`
+  nr_dev=$(($nr / 10 ))
+  nr_train=$(( $nr - $nr_dev ))
+  orig_train_text=$train_text
+  head -n $nr_train $train_text > $tgtdir/train_text
+  tail -n $nr_dev $train_text > $tgtdir/dev_text
+
+  train_text=$tgtdir/train_text
+  dev_text=$tgtdir/dev_text
+  echo "Using words file: $words_file"
+  echo "Using train text: 9/10 of $orig_train_text"
+  echo "Using dev text  : 1/10 of $orig_train_text"
+elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then
+  echo "Using words file: $words_file"
+  echo "Using train text: $train_text"
+  echo "Using dev text  : $dev_text"
+  train_text=$train_text
+  dev_text=$dev_text
+else
+  train_text=$datadir/train/text
+  dev_text=$datadir/dev2h/text
+  echo "Using words file: $words_file"
+  echo "Using train text: $train_text"
+  echo "Using dev text  : $dev_text"
+
+fi
+
+[ ! -f $words_file ] && echo >&2 "File $words_file must exist!" && exit 1
+[ ! -f $train_text ] && echo >&2 "File $train_text must exist!" && exit 1
+[ ! -f $dev_text ] && echo >&2 "File $dev_text must exist!" && exit 1
+
+
+# Extract the word list from the training dictionary; exclude special symbols
+sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '<eps>' | grep -v -F "$oov_symbol" > $tgtdir/vocab
+if (($?)); then
+  echo "Failed to create vocab from $words_file"
+  exit 1
+else
+  # wc vocab # doesn't work due to some encoding issues
+  echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'`
+fi
+
+# Kaldi transcript files contain Utterance_ID as the first word; remove it
+# We also have to avoid skewing the LM by incorporating  the same sentences
+# from different channels
+sed -e "s/\.CH.//" -e "s/_.\-./_/" -e "s/NOLOCATION\(\.[LR]\)*-//" -e "s/U[0-9][0-9]_//" $train_text | sort -u | \
+  perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/train.txt
+if (($?)); then
+    echo "Failed to create $tgtdir/train.txt from $train_text"
+    exit 1
+else
+    echo "Removed first word (uid) from every line of $train_text"
+    # wc text.train train.txt # doesn't work due to some encoding issues
+    echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'`
+    echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'`
+fi
+
+# Kaldi transcript files contain Utterance_ID as the first word; remove it
+sed -e "s/\.CH.//" -e "s/_.\-./_/" $dev_text | sort -u | \
+  perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/dev.txt
+if (($?)); then
+    echo "Failed to create $tgtdir/dev.txt from $dev_text"
+    exit 1
+else
+    echo "Removed first word (uid) from every line of $dev_text"
+    # wc text.train train.txt # doesn't work due to some encoding issues
+    echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'`
+    echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F;  $s++;}END{print "$w words, $s sentences\n";}'`
+fi
+
+
+echo "-------------------"
+echo "Good-Turing 3grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 3grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn111.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn112.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn122.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/3gram.kn123.gz -kndiscount1 -gt1min 1 \
+  -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+
+echo "-------------------"
+echo "Good-Turing 4grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/4gram.gt0111.gz \
+  -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0112.gz \
+  -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0122.gz \
+  -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0123.gz \
+  -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0113.gz \
+  -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0222.gz \
+  -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.gt0223.gz \
+  -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+echo "-------------------"
+echo "Kneser-Ney 4grams"
+echo "-------------------"
+ngram-count -lm $tgtdir/4gram.kn0111.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0112.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0113.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0122.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0123.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0222.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+ngram-count -lm $tgtdir/4gram.kn0223.gz \
+  -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \
+  -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol"
+
+if [ ! -z ${LIBLBFGS} ]; then
+  #please note that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault
+  #instead of that, we simply output the model in the maxent format and convert it using the "ngram"
+  echo "-------------------"
+  echo "Maxent 3grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    ngram -lm - -order 3 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1
+
+  echo "-------------------"
+  echo "Maxent 4grams"
+  echo "-------------------"
+  sed 's/'${oov_symbol}'/<unk>/g' $tgtdir/train.txt | \
+    ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\
+    ngram -lm - -order 4 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\
+    sed 's/<unk>/'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1
+else
+  echo >&2  "SRILM is not compiled with the support of MaxEnt models."
+  echo >&2  "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh"
+  echo >&2  "which will take care of compiling the SRILM with MaxEnt support"
+  exit 1;
+fi
+
+
+echo "--------------------"
+echo "Computing perplexity"
+echo "--------------------"
+(
+  for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done
+  for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done
+)  | sort  -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt
+
+echo "The perlexity scores report is stored in $tgtdir/perplexities.txt "
+echo ""
+
+for best_ngram in {3,4}gram ; do
+  outlm=best_${best_ngram}.gz
+  lmfilename=$(grep "${best_ngram}" $tgtdir/perplexities.txt | head -n 1 | cut -f 1 -d ' ')
+  echo "$outlm -> $lmfilename"
+  (cd $tgtdir; rm -f $outlm; ln -sf $(basename $lmfilename) $outlm )
+done
diff --git a/egs/chime6/s5_track1/local/wer_output_filter b/egs/chime6/s5_track1/local/wer_output_filter
new file mode 100755
index 00000000000..6f4b6400716
--- /dev/null
+++ b/egs/chime6/s5_track1/local/wer_output_filter
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal <jtrmal@gmail.com>)
+# Apache 2.0
+
+
+## Filter for scoring of the STT results. Convert everything to lowercase
+## and add some ad-hoc fixes for the hesitations
+
+perl -e '
+   while(<STDIN>) {
+     @A  = split(" ", $_);
+     $id = shift @A; print "$id ";
+     foreach $a (@A) {
+       print lc($a) . " " unless $a =~ /\[.*\]/;
+     }
+     print "\n";
+    }' | \
+sed -e '
+    s/\<mhm\>/hmm/g;
+    s/\<mm\>/hmm/g;
+    s/\<mmm\>/hmm/g;
+'
+
+#| uconv -f  utf-8  -t utf-8 -x Latin-ASCII
+
diff --git a/egs/chime6/s5_track1/path.sh b/egs/chime6/s5_track1/path.sh
new file mode 100644
index 00000000000..fb1c0489386
--- /dev/null
+++ b/egs/chime6/s5_track1/path.sh
@@ -0,0 +1,7 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+
diff --git a/egs/chime6/s5_track1/run.sh b/egs/chime6/s5_track1/run.sh
new file mode 100755
index 00000000000..0890a939faf
--- /dev/null
+++ b/egs/chime6/s5_track1/run.sh
@@ -0,0 +1,280 @@
+#!/bin/bash
+#
+# Based mostly on the TED-LIUM and Switchboard recipe
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
+# Apache 2.0
+#
+
+# Begin configuration section.
+nj=96
+decode_nj=20
+stage=0
+nnet_stage=-10
+decode_stage=1
+decode_only=false
+num_data_reps=4
+foreground_snrs="20:10:15:5:0"
+background_snrs="20:10:15:5:0"
+enhancement=beamformit # gss or beamformit
+
+# End configuration section
+. ./utils/parse_options.sh
+
+. ./cmd.sh
+. ./path.sh
+
+if [ $decode_only == "true" ]; then
+  stage=16
+fi
+
+set -e # exit on error
+
+# chime5 main directory path
+# please change the path accordingly
+chime5_corpus=/export/corpora4/CHiME5
+# chime6 data directories, which are generated from ${chime5_corpus},
+# to synchronize audio files across arrays and modify the annotation (JSON) file accordingly
+chime6_corpus=${PWD}/CHiME6
+json_dir=${chime6_corpus}/transcriptions
+audio_dir=${chime6_corpus}/audio
+
+if [[ ${enhancement} == *gss* ]]; then
+  enhanced_dir=${enhanced_dir}_multiarray
+  enhancement=${enhancement}_multiarray
+fi
+
+if [[ ${enhancement} == *beamformit* ]]; then
+  enhanced_dir=${enhanced_dir}
+  enhancement=${enhancement}
+fi
+
+test_sets="dev_${enhancement} eval_${enhancement}"
+train_set=train_worn_simu_u400k
+
+# This script also needs the phonetisaurus g2p, srilm, beamformit
+./local/check_tools.sh || exit 1
+
+###########################################################################
+# We first generate the synchronized audio files across arrays and
+# corresponding JSON files. Note that this requires sox v14.4.2,
+# which is installed via miniconda in ./local/check_tools.sh
+###########################################################################
+
+if [ $stage -le 0 ]; then
+  local/generate_chime6_data.sh \
+    --cmd "$train_cmd" \
+    ${chime5_corpus} \
+    ${chime6_corpus}
+fi
+
+###########################################################################
+# We prepare dict and lang in stages 1 to 3.
+###########################################################################
+
+if [ $stage -le 1 ]; then
+  echo "$0:  prepare data..."
+  # skip u03 and u04 as they are missing
+  for mictype in worn u01 u02 u05 u06; do
+    local/prepare_data.sh --mictype ${mictype} \
+			  ${audio_dir}/train ${json_dir}/train data/train_${mictype}
+  done
+  for dataset in dev; do
+    for mictype in worn; do
+      local/prepare_data.sh --mictype ${mictype} \
+			    ${audio_dir}/${dataset} ${json_dir}/${dataset} \
+			    data/${dataset}_${mictype}
+    done
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0:  train lm ..."
+  local/prepare_dict.sh
+
+  utils/prepare_lang.sh \
+    data/local/dict "<unk>" data/local/lang data/lang
+
+  local/train_lms_srilm.sh \
+    --train-text data/train_worn/text --dev-text data/dev_worn/text \
+    --oov-symbol "<unk>" --words-file data/lang/words.txt \
+    data/ data/srilm
+fi
+
+LM=data/srilm/best_3gram.gz
+if [ $stage -le 3 ]; then
+  # Compiles G for chime5 trigram LM
+  echo "$0:  prepare lang..."
+  utils/format_lm.sh \
+		data/lang $LM data/local/dict/lexicon.txt data/lang
+
+fi
+  
+#########################################################################################
+# In stages 4 to 7, we augment and fix train data for our training purpose. point source
+# noises are extracted from chime corpus. Here we use 400k utterances from array microphones,
+# its augmentation and all the worn set utterances in train.
+#########################################################################################
+
+if [ $stage -le 4 ]; then
+  # remove possibly bad sessions (P11_S03, P52_S19, P53_S24, P54_S24)
+  # see http://spandh.dcs.shef.ac.uk/chime_challenge/data.html for more details
+  utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
+  grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
+  utils/fix_data_dir.sh data/train_worn
+fi
+
+if [ $stage -le 5 ]; then
+  local/extract_noises.py $chime6_corpus/audio/train $chime6_corpus/transcriptions/train \
+    local/distant_audio_list distant_noises
+  local/make_noise_list.py distant_noises > distant_noise_list
+
+  noise_list=distant_noise_list
+  
+  if [ ! -d RIRS_NOISES/ ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+
+  # This is the config for the system using simulated RIRs and point-source noises
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+  rvb_opts+=(--noise-set-parameters $noise_list)
+
+  steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --prefix "rev" \
+    --foreground-snrs $foreground_snrs \
+    --background-snrs $background_snrs \
+    --speech-rvb-probability 1 \
+    --pointsource-noise-addition-probability 1 \
+    --isotropic-noise-addition-probability 1 \
+    --num-replications $num_data_reps \
+    --max-noises-per-minute 1 \
+    --source-sampling-rate 16000 \
+    data/train_worn data/train_worn_rvb
+fi
+
+if [ $stage -le 6 ]; then
+  # combine mix array and worn mics
+  # randomly extract first 400k utterances from all mics
+  # if you want to include more training data, you can increase the number of array mic utterances
+  utils/combine_data.sh data/train_uall data/train_u01 data/train_u02 data/train_u05 data/train_u06
+  utils/subset_data_dir.sh data/train_uall 400000 data/train_u400k
+  utils/combine_data.sh data/${train_set} data/train_worn data/train_worn_rvb data/train_u400k
+
+  # only use left channel for worn mic recognition
+  # you can use both left and right channels for training
+  for dset in train dev; do
+    utils/copy_data_dir.sh data/${dset}_worn data/${dset}_worn_stereo
+    grep "\.L-" data/${dset}_worn_stereo/text > data/${dset}_worn/text
+    utils/fix_data_dir.sh data/${dset}_worn
+  done
+fi
+
+if [ $stage -le 7 ]; then
+  # Split speakers up into 3-minute chunks.  This doesn't hurt adaptation, and
+  # lets us use more jobs for decoding etc.
+  for dset in ${train_set}; do
+    utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit
+    utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset}
+  done
+fi
+
+##################################################################################
+# Now make 13-dim MFCC features. We use 13-dim fetures for GMM-HMM systems.
+##################################################################################
+
+if [ $stage -le 8 ]; then
+  # Now make MFCC features.
+  # mfccdir should be some place with a largish disk where you
+  # want to store MFCC features.
+  echo "$0:  make features..."
+  mfccdir=mfcc
+  for x in ${train_set}; do
+    steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" \
+		       data/$x exp/make_mfcc/$x $mfccdir
+    steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
+    utils/fix_data_dir.sh data/$x
+  done
+fi
+
+###################################################################################
+# Stages 9 to 13 train monophone and triphone models. They will be used for
+# generating lattices for training the chain model
+###################################################################################
+
+if [ $stage -le 9 ]; then
+  # make a subset for monophone training
+  utils/subset_data_dir.sh --shortest data/${train_set} 100000 data/${train_set}_100kshort
+  utils/subset_data_dir.sh data/${train_set}_100kshort 30000 data/${train_set}_30kshort
+fi
+
+if [ $stage -le 10 ]; then
+  # Starting basic training on MFCC features
+  steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
+		      data/${train_set}_30kshort data/lang exp/mono
+fi
+
+if [ $stage -le 11 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+		    data/${train_set} data/lang exp/mono exp/mono_ali
+
+  steps/train_deltas.sh --cmd "$train_cmd" \
+			2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1
+fi
+
+if [ $stage -le 12 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+		    data/${train_set} data/lang exp/tri1 exp/tri1_ali
+
+  steps/train_lda_mllt.sh --cmd "$train_cmd" \
+			  4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2
+fi
+
+if [ $stage -le 13 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+		    data/${train_set} data/lang exp/tri2 exp/tri2_ali
+
+  steps/train_sat.sh --cmd "$train_cmd" \
+		     5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3
+fi
+
+#######################################################################
+# Perform data cleanup for training data.
+#######################################################################
+
+if [ $stage -le 14 ]; then
+  # The following script cleans the data and produces cleaned data
+  steps/cleanup/clean_and_segment_data.sh --nj ${nj} --cmd "$train_cmd" \
+    --segmentation-opts "--min-segment-length 0.3 --min-new-segment-length 0.6" \
+    data/${train_set} data/lang exp/tri3 exp/tri3_cleaned data/${train_set}_cleaned
+fi
+
+##########################################################################
+# CHAIN MODEL TRAINING
+# skipping decoding here and performing it in step 16
+##########################################################################
+
+if [ $stage -le 15 ]; then
+  # chain TDNN
+  local/chain/run_tdnn.sh --nj ${nj} \
+    --stage $nnet_stage \
+    --train-set ${train_set}_cleaned \
+    --test-sets "$test_sets" \
+    --gmm tri3_cleaned --nnet3-affix _${train_set}_cleaned_rvb
+fi
+
+##########################################################################
+# DECODING is done in the local/decode.sh script. This script performs
+# enhancement, fixes test sets performs feature extraction and 2 stage decoding
+##########################################################################
+
+if [ $stage -le 16 ]; then
+  local/decode.sh --stage $decode_stage \
+    --enhancement $enhancement \
+    --train_set "$train_set"
+fi
+
+exit 0;
diff --git a/egs/chime6/s5_track1/steps b/egs/chime6/s5_track1/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/chime6/s5_track1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/chime6/s5_track1/utils b/egs/chime6/s5_track1/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/chime6/s5_track1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/RESULTS b/egs/chime6/s5_track2/RESULTS
new file mode 100644
index 00000000000..eacee196584
--- /dev/null
+++ b/egs/chime6/s5_track2/RESULTS
@@ -0,0 +1,19 @@
+# Results for Chime-6 track 2 for dev and eval, using pretrained models
+# available at http://kaldi-asr.org/models/m12.
+
+# Speech Activity Detection (SAD)
+          Missed speech   False alarm   Total error
+Dev         4.3             2.1           6.4                                                
+Eval        5.6             5.9           11.5
+
+# The results for the remaining pipeline are only for array U06.
+
+# Diarization
+        DER       JER
+Dev     31.37     20.45
+Eval    30.67     18.97
+
+# ASR nnet3 tdnn+chain
+Dev: U06 58881 48061 81.62
+Eval: U06 55132 47184 85.58
+
diff --git a/egs/chime6/s5_track2/cmd.sh b/egs/chime6/s5_track2/cmd.sh
new file mode 100644
index 00000000000..86514d94d4d
--- /dev/null
+++ b/egs/chime6/s5_track2/cmd.sh
@@ -0,0 +1,14 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="retry.pl queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
diff --git a/egs/chime6/s5_track2/conf/beamformit.cfg b/egs/chime6/s5_track2/conf/beamformit.cfg
new file mode 100755
index 00000000000..70fdd858651
--- /dev/null
+++ b/egs/chime6/s5_track2/conf/beamformit.cfg
@@ -0,0 +1,50 @@
+#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+####directories where to store/retrieve info####
+#channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+#show_id = Ttmp
+
diff --git a/egs/chime6/s5_track2/conf/mfcc.conf b/egs/chime6/s5_track2/conf/mfcc.conf
new file mode 100644
index 00000000000..32988403b00
--- /dev/null
+++ b/egs/chime6/s5_track2/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false
+--sample-frequency=16000
diff --git a/egs/chime6/s5_track2/conf/mfcc_hires.conf b/egs/chime6/s5_track2/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..fd64b62eb16
--- /dev/null
+++ b/egs/chime6/s5_track2/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=16000 
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=40
+--high-freq=-400
diff --git a/egs/chime6/s5_track2/conf/online_cmvn.conf b/egs/chime6/s5_track2/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/chime6/s5_track2/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/chime6/s5_track2/conf/sad.conf b/egs/chime6/s5_track2/conf/sad.conf
new file mode 100644
index 00000000000..752bb1cf6c5
--- /dev/null
+++ b/egs/chime6/s5_track2/conf/sad.conf
@@ -0,0 +1,2 @@
+affix=_1a
+nnet_type=stats
diff --git a/egs/chime6/s5_track2/diarization b/egs/chime6/s5_track2/diarization
new file mode 120000
index 00000000000..bad937c1444
--- /dev/null
+++ b/egs/chime6/s5_track2/diarization
@@ -0,0 +1 @@
+../../callhome_diarization/v1/diarization
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/chain b/egs/chime6/s5_track2/local/chain
new file mode 120000
index 00000000000..dd7910711d1
--- /dev/null
+++ b/egs/chime6/s5_track2/local/chain
@@ -0,0 +1 @@
+../../s5_track1/local/chain/
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/check_tools.sh b/egs/chime6/s5_track2/local/check_tools.sh
new file mode 120000
index 00000000000..4e835e887f2
--- /dev/null
+++ b/egs/chime6/s5_track2/local/check_tools.sh
@@ -0,0 +1 @@
+../../s5_track1/local/check_tools.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/convert_rttm_to_utt2spk_and_segments.py b/egs/chime6/s5_track2/local/convert_rttm_to_utt2spk_and_segments.py
new file mode 100755
index 00000000000..410dced190c
--- /dev/null
+++ b/egs/chime6/s5_track2/local/convert_rttm_to_utt2spk_and_segments.py
@@ -0,0 +1,98 @@
+#! /usr/bin/env python
+# Copyright   2019   Vimal Manohar
+# Apache 2.0.
+
+"""This script converts an RTTM with
+speaker info into kaldi utt2spk and segments"""
+
+import argparse
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script converts an RTTM with
+        speaker info into kaldi utt2spk and segments""")
+    parser.add_argument("--use-reco-id-as-spkr", type=str,
+                        choices=["true", "false"], default="false",
+                        help="Use the recording ID based on RTTM and "
+                        "reco2file_and_channel as the speaker")
+    parser.add_argument("--append-reco-id-to-spkr", type=str,
+                        choices=["true", "false"], default="false",
+                        help="Append recording ID to the speaker ID")
+
+    parser.add_argument("rttm_file", type=str,
+                        help="""Input RTTM file.
+                        The format of the RTTM file is
+                        <type> <file-id> <channel-id> <begin-time> """
+                        """<end-time> <NA> <NA> <speaker> <conf>""")
+    parser.add_argument("reco2file_and_channel", type=str,
+                        help="""Input reco2file_and_channel.
+                        The format is <recording-id> <file-id> <channel-id>.""")
+    parser.add_argument("utt2spk", type=str,
+                        help="Output utt2spk file")
+    parser.add_argument("segments", type=str,
+                        help="Output segments file")
+
+    args = parser.parse_args()
+
+    args.use_reco_id_as_spkr = bool(args.use_reco_id_as_spkr == "true")
+    args.append_reco_id_to_spkr = bool(args.append_reco_id_to_spkr == "true")
+
+    if args.use_reco_id_as_spkr:
+        if args.append_reco_id_to_spkr:
+            raise Exception("Appending recording ID to speaker does not make sense when using --use-reco-id-as-spkr=true")
+
+    return args
+
+def main():
+    args = get_args()
+
+    file_and_channel2reco = {}
+    utt2spk={}
+    segments={}
+    for line in open(args.reco2file_and_channel):
+        parts = line.strip().split()
+        file_and_channel2reco[(parts[1], parts[2])] = parts[0]
+
+    utt2spk_writer = open(args.utt2spk, 'w')
+    segments_writer = open(args.segments, 'w')
+    for line in open(args.rttm_file):
+        parts = line.strip().split()
+        if parts[0] != "SPEAKER":
+            continue
+
+        file_id = parts[1]
+        channel = parts[2]
+
+        try:
+            reco = file_and_channel2reco[(file_id, channel)]
+        except KeyError as e:
+            raise Exception("Could not find recording with "
+                            "(file_id, channel) "
+                            "= ({0},{1}) in {2}: {3}\n".format(
+                                file_id, channel,
+                                args.reco2file_and_channel, str(e)))
+
+        start_time = float(parts[3])
+        end_time = start_time + float(parts[4])
+
+        if args.use_reco_id_as_spkr:
+            spkr = reco
+        else:
+            if args.append_reco_id_to_spkr:
+                spkr = reco + "-" + parts[7]
+            else:
+                spkr = parts[7]
+
+        st = int(start_time * 100)
+        end = int(end_time * 100)
+        utt = "{0}-{1:06d}-{2:06d}".format(spkr, st, end)
+        utt2spk[utt]=spkr
+        segments[utt]=(reco, start_time, end_time)
+
+    for uttid_id in sorted(utt2spk):
+        utt2spk_writer.write("{0} {1}\n".format(uttid_id, utt2spk[uttid_id]))
+        segments_writer.write("{0} {1} {2:7.2f} {3:7.2f}\n".format(
+            uttid_id, segments[uttid_id][0], segments[uttid_id][1], segments[uttid_id][2]))
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/chime6/s5_track2/local/copy_lat_dir_parallel.sh b/egs/chime6/s5_track2/local/copy_lat_dir_parallel.sh
new file mode 120000
index 00000000000..a168a917d92
--- /dev/null
+++ b/egs/chime6/s5_track2/local/copy_lat_dir_parallel.sh
@@ -0,0 +1 @@
+../../s5_track1/local/copy_lat_dir_parallel.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/decode.sh b/egs/chime6/s5_track2/local/decode.sh
new file mode 100755
index 00000000000..66a96fce37a
--- /dev/null
+++ b/egs/chime6/s5_track2/local/decode.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+#
+# This script decodes raw utterances through the entire pipeline:
+# Feature extraction -> SAD -> Diarization -> ASR
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
+#            2019  Desh Raj, David Snyder, Ashish Arora
+# Apache 2.0
+
+# Begin configuration section.
+nj=8
+decode_nj=10
+stage=0
+sad_stage=0
+diarizer_stage=0
+decode_diarize_stage=0
+score_stage=0
+enhancement=beamformit
+
+# chime5 main directory path
+# please change the path accordingly
+chime5_corpus=/export/corpora4/CHiME5
+# chime6 data directories, which are generated from ${chime5_corpus},
+# to synchronize audio files across arrays and modify the annotation (JSON) file accordingly
+chime6_corpus=${PWD}/CHiME6
+json_dir=${chime6_corpus}/transcriptions
+audio_dir=${chime6_corpus}/audio
+
+enhanced_dir=enhanced
+enhanced_dir=$(utils/make_absolute.sh $enhanced_dir) || exit 1
+
+# training data
+train_set=train_worn_simu_u400k
+test_sets="dev_${enhancement}_dereverb eval_${enhancement}_dereverb"
+
+. ./utils/parse_options.sh
+
+. ./cmd.sh
+. ./path.sh
+. ./conf/sad.conf
+
+# This script also needs the phonetisaurus g2p, srilm, beamformit
+./local/check_tools.sh || exit 1
+
+###########################################################################
+# We first generate the synchronized audio files across arrays and
+# corresponding JSON files. Note that this requires sox v14.4.2,
+# which is installed via miniconda in ./local/check_tools.sh
+###########################################################################
+
+if [ $stage -le 0 ]; then
+  local/generate_chime6_data.sh \
+    --cmd "$train_cmd" \
+    ${chime5_corpus} \
+    ${chime6_corpus}
+fi
+
+#######################################################################
+# Prepare the dev and eval data with dereverberation (WPE) and
+# beamforming.
+#######################################################################
+if [ $stage -le 1 ]; then
+  # Beamforming using reference arrays
+  # enhanced WAV directory
+  enhandir=enhan
+  dereverb_dir=${PWD}/wav/wpe/
+
+  for dset in dev eval; do
+    for mictype in u01 u02 u03 u04 u06; do
+      local/run_wpe.sh --nj 4 --cmd "$train_cmd --mem 20G" \
+            ${audio_dir}/${dset} \
+            ${dereverb_dir}/${dset} \
+            ${mictype}
+    done
+  done
+
+  for dset in dev eval; do
+    for mictype in u01 u02 u03 u04 u06; do
+      local/run_beamformit.sh --cmd "$train_cmd" \
+        ${dereverb_dir}/${dset} \
+        ${enhandir}/${dset}_${enhancement}_${mictype} \
+        ${mictype}
+    done
+  done
+
+  # Note that for the evaluation sets, we use the flag
+  # "--train false". This keeps the files segments, text,
+  # and utt2spk with .bak extensions, so that they can
+  # be used later for scoring if needed but are not used
+  # in the intermediate stages.
+  for dset in dev eval; do
+    local/prepare_data.sh --mictype ref --train false \
+      "$PWD/${enhandir}/${dset}_${enhancement}_u0*" \
+      ${json_dir}/${dset} data/${dset}_${enhancement}_dereverb
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # mfccdir should be some place with a largish disk where you
+  # want to store MFCC features.
+  mfccdir=mfcc
+  for x in ${test_sets}; do
+    steps/make_mfcc.sh --nj $decode_nj --cmd "$train_cmd" \
+      --mfcc-config conf/mfcc_hires.conf \
+      data/$x exp/make_mfcc/$x $mfccdir
+  done
+fi
+
+#######################################################################
+# Perform SAD on the dev/eval data
+#######################################################################
+dir=exp/segmentation${affix}
+sad_work_dir=exp/sad${affix}_${nnet_type}/
+sad_nnet_dir=$dir/tdnn_${nnet_type}_sad_1a
+
+if [ $stage -le 3 ]; then
+  for datadir in ${test_sets}; do
+    test_set=data/${datadir}
+    if [ ! -f ${test_set}/wav.scp ]; then
+      echo "$0: Not performing SAD on ${test_set}"
+      exit 0
+    fi
+    # Perform segmentation
+    local/segmentation/detect_speech_activity.sh --nj $decode_nj --stage $sad_stage \
+      $test_set $sad_nnet_dir mfcc $sad_work_dir \
+      data/${datadir} || exit 1
+
+    mv data/${datadir}_seg data/${datadir}_${nnet_type}_seg
+    # Generate RTTM file from segmentation performed by SAD. This can
+    # be used to evaluate the performance of the SAD as an intermediate
+    # step.
+    steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
+      data/${datadir}_${nnet_type}_seg/utt2spk data/${datadir}_${nnet_type}_seg/segments \
+      data/${datadir}_${nnet_type}_seg/rttm
+  done
+fi
+
+#######################################################################
+# Perform diarization on the dev/eval data
+#######################################################################
+if [ $stage -le 4 ]; then
+  for datadir in ${test_sets}; do
+    local/diarize.sh --nj 10 --cmd "$train_cmd" --stage $diarizer_stage \
+      exp/xvector_nnet_1a \
+      data/${datadir}_${nnet_type}_seg \
+      exp/${datadir}_${nnet_type}_seg_diarization
+  done
+fi
+
+#######################################################################
+# Decode diarized output using trained chain model
+#######################################################################
+if [ $stage -le 5 ]; then
+  for datadir in ${test_sets}; do
+    local/decode_diarized.sh --nj $nj --cmd "$decode_cmd" --stage $decode_diarize_stage \
+      exp/${datadir}_${nnet_type}_seg_diarization data/$datadir data/lang_chain \
+      exp/chain_${train_set}_cleaned_rvb exp/nnet3_${train_set}_cleaned_rvb \
+      data/${datadir}_diarized
+  done
+fi
+
+#######################################################################
+# Score decoded dev/eval sets
+#######################################################################
+if [ $stage -le 6 ]; then
+  for datadir in ${test_sets}; do
+    local/multispeaker_score.sh --cmd "$train_cmd" --stage $score_stage \
+      --datadir $datadir data/${datadir}_diarized_hires/text \
+      exp/chain_${train_set}_cleaned_rvb/tdnn1b_sp/decode_${datadir}_diarized_2stage/scoring_kaldi/penalty_1.0/10.txt \
+      exp/chain_${train_set}_cleaned_rvb/tdnn1b_sp/decode_${datadir}_diarized_2stage/scoring_kaldi_multispeaker
+  done
+fi
+exit 0;
diff --git a/egs/chime6/s5_track2/local/decode_diarized.sh b/egs/chime6/s5_track2/local/decode_diarized.sh
new file mode 100755
index 00000000000..2d0ad6a3b95
--- /dev/null
+++ b/egs/chime6/s5_track2/local/decode_diarized.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright   2019   Ashish Arora, Vimal Manohar
+# Apache 2.0.
+# This script takes an rttm file, and performs decoding on on a test directory.
+# The output directory contains a text file which can be used for scoring.
+
+
+stage=0
+nj=8
+cmd=queue.pl
+echo "$0 $@"  # Print the command line for logging
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+if [ $# != 6 ]; then
+  echo "Usage: $0 <rttm-dir> <in-data-dir> <lang-dir> <model-dir> <ivector-dir> <out-dir>"
+  echo "e.g.: $0 data/rttm data/dev data/lang_chain exp/chain_train_worn_simu_u400k_cleaned_rvb \
+                 exp/nnet3_train_worn_simu_u400k_cleaned_rvb data/dev_diarized"
+  echo "Options: "
+  echo "  --nj <nj>                                        # number of parallel jobs."
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  exit 1;
+fi
+
+rttm_dir=$1
+data_in=$2
+lang_dir=$3
+asr_model_dir=$4
+ivector_extractor=$5
+out_dir=$6
+
+for f in $rttm_dir/rttm $data_in/wav.scp $data_in/text.bak \
+         $lang_dir/L.fst $asr_model_dir/tree_sp/graph/HCLG.fst \
+         $asr_model_dir/tdnn1b_sp/final.mdl; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+if [ $stage -le 0 ]; then
+  echo "$0 copying data files in output directory"
+  cp $rttm_dir/rttm $rttm_dir/rttm_1
+  sed -i 's/'.ENH'/''/g' $rttm_dir/rttm_1
+  mkdir -p ${out_dir}_hires
+  cp ${data_in}/{wav.scp,utt2spk} ${out_dir}_hires
+  utils/data/get_reco2dur.sh ${out_dir}_hires
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0 creating segments file from rttm and utt2spk, reco2file_and_channel "
+  local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_1 \
+    <(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_1 |sort -u) \
+    ${out_dir}_hires/utt2spk ${out_dir}_hires/segments
+
+  utils/utt2spk_to_spk2utt.pl ${out_dir}_hires/utt2spk > ${out_dir}_hires/spk2utt
+
+  awk '{print $1" "$1" 1"}' ${out_dir}_hires/wav.scp > ${out_dir}_hires/reco2file_and_channel
+  utils/fix_data_dir.sh ${out_dir}_hires || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0 extracting mfcc freatures using segments file"
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd queue.pl ${out_dir}_hires
+  steps/compute_cmvn_stats.sh ${out_dir}_hires
+  cp $data_in/text.bak ${out_dir}_hires/text
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0 performing decoding on the extracted features"
+  local/nnet3/decode.sh --affix 2stage --acwt 1.0 --post-decode-acwt 10.0 \
+    --frames-per-chunk 150 --nj $nj --ivector-dir $ivector_extractor \
+    $out_dir $lang_dir $asr_model_dir/tree_sp/graph $asr_model_dir/tdnn1b_sp/
+fi
+
diff --git a/egs/chime6/s5_track2/local/diarize.sh b/egs/chime6/s5_track2/local/diarize.sh
new file mode 100755
index 00000000000..2ca95dc0fbc
--- /dev/null
+++ b/egs/chime6/s5_track2/local/diarize.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright   2019   David Snder
+# Apache 2.0.
+#
+# This script takes an input directory that has a segments file (and
+# a feats.scp file), and performs diarization on it.  The output directory
+# contains an RTTM file which can be used to resegment the input data.
+
+stage=0
+nj=10
+cmd="run.pl"
+ref_rttm=
+
+echo "$0 $@"  # Print the command line for logging
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+if [ $# != 3 ]; then
+  echo "Usage: $0 <model-dir>  <in-data-dir> <out-dir>"
+  echo "e.g.: $0 exp/xvector_nnet_1a  data/dev exp/dev_diarization"
+  echo "Options: "
+  echo "  --nj <nj>                                        # number of parallel jobs."
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --ref-rttm <path to reference RTTM>              # if present, used to score output RTTM."
+  exit 1;
+fi
+
+model_dir=$1
+data_in=$2
+out_dir=$3
+
+name=`basename $data_in`
+
+for f in $data_in/feats.scp $data_in/segments $model_dir/plda \
+  $model_dir/final.raw $model_dir/extract.config; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+if [ $stage -le 0 ]; then
+  echo "$0: keeping only data corresponding to array U06 "
+  echo "$0: we can skip this stage, to perform diarization on all arrays "
+  cp -r data/$name data/${name}.bak
+  mv data/$name/wav.scp data/$name/wav.scp.bak
+  grep 'U06' data/$name/wav.scp.bak > data/$name/wav.scp
+  utils/fix_data_dir.sh data/$name
+  nj=2 # since we have reduced number of "speakers" now
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: computing features for x-vector extractor"
+  utils/fix_data_dir.sh data/${name}
+  rm -rf data/${name}_cmn
+  local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$cmd" \
+    data/$name data/${name}_cmn exp/${name}_cmn
+  cp data/$name/segments exp/${name}_cmn/
+  utils/fix_data_dir.sh data/${name}_cmn
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: extracting x-vectors for all segments"
+  diarization/nnet3/xvector/extract_xvectors.sh --cmd "$cmd" \
+    --nj $nj --window 1.5 --period 0.75 --apply-cmn false \
+    --min-segment 0.5 $model_dir \
+    data/${name}_cmn $out_dir/xvectors_${name}
+fi
+
+# Perform PLDA scoring
+if [ $stage -le 3 ]; then
+  # Perform PLDA scoring on all pairs of segments for each recording.
+  echo "$0: performing PLDA scoring between all pairs of x-vectors"
+  diarization/nnet3/xvector/score_plda.sh --cmd "$cmd" \
+    --target-energy 0.5 \
+    --nj $nj $model_dir/ $out_dir/xvectors_${name} \
+    $out_dir/xvectors_${name}/plda_scores
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: performing clustering using PLDA scores (we assume 4 speakers per recording)"
+  awk '{print $1, "4"}' data/$name/wav.scp > data/$name/reco2num_spk
+  diarization/cluster.sh --cmd "$cmd" --nj $nj \
+    --reco2num-spk data/$name/reco2num_spk \
+    --rttm-channel 1 \
+    $out_dir/xvectors_${name}/plda_scores $out_dir
+  echo "$0: wrote RTTM to output directory ${out_dir}"
+fi
+
+if [ $stage -le 5 ]; then
+  if [ -f $ref_rttm ]; then
+    echo "$0: computing diariztion error rate (DER) using reference ${ref_rttm}"
+    mkdir -p $out_dir/tuning/
+    md-eval.pl -c 0.25 -1 -r $ref_rttm -s $out_dir/rttm 2> $out_dir/log/der.log > $out_dir/der
+    der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' ${out_dir}/der)
+    echo "DER: $der%"
+  fi
+fi
+
diff --git a/egs/chime6/s5_track2/local/distant_audio_list b/egs/chime6/s5_track2/local/distant_audio_list
new file mode 120000
index 00000000000..0455876cf4d
--- /dev/null
+++ b/egs/chime6/s5_track2/local/distant_audio_list
@@ -0,0 +1 @@
+../../s5_track1/local/distant_audio_list
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/extract_noises.py b/egs/chime6/s5_track2/local/extract_noises.py
new file mode 120000
index 00000000000..04a6389916d
--- /dev/null
+++ b/egs/chime6/s5_track2/local/extract_noises.py
@@ -0,0 +1 @@
+../../s5_track1/local/extract_noises.py
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/extract_vad_weights.sh b/egs/chime6/s5_track2/local/extract_vad_weights.sh
new file mode 120000
index 00000000000..0db29cded5d
--- /dev/null
+++ b/egs/chime6/s5_track2/local/extract_vad_weights.sh
@@ -0,0 +1 @@
+../../s5_track1/local/extract_vad_weights.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/gen_aligned_hyp.py b/egs/chime6/s5_track2/local/gen_aligned_hyp.py
new file mode 100755
index 00000000000..acaa3a13ad5
--- /dev/null
+++ b/egs/chime6/s5_track2/local/gen_aligned_hyp.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+# Copyright   2019   Yusuke Fujita
+# Apache 2.0.
+
+"""This script generates hypothesis utterances aligned with reference segments.
+  Usage: gen_align_hyp.py alignment.txt wc.txt > hyp.txt
+    alignment.txt is a session-level word alignment generated by align-text command.
+    wc.txt is a sequence of utt-id:reference_word_count generated by 'local/get_ref_perspeaker_persession_file.py'.
+"""
+
+import sys, io
+import string
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+
+def load_align_text(f):
+    alignments = {}
+    for line in f:
+        recoid, res = line.split(None, 1)
+        alignments[recoid] = []
+        toks = res.split(';')
+        for tok in toks:
+            ref, hyp = tok.split()
+            alignments[recoid].append((ref, hyp))
+    return alignments
+
+alignments = load_align_text(open(sys.argv[1],'r', encoding='utf8'))
+
+for line in open(sys.argv[2],'r', encoding='utf8'):
+    recoid, res = line.split(None, 1)
+    ali = iter(alignments[recoid])
+    toks = res.split()
+    for tok in toks:
+        uttid, count = tok.split(':')
+        count = int(count)
+        text = ''
+        for i in range(count):
+            while True:
+                ref, hyp = ali.__next__()
+                if hyp != '<eps>':
+                    text += ' ' + hyp
+                if ref != '<eps>':
+                    break
+        output.write(uttid + ' ' + text.strip() + '\n')
diff --git a/egs/chime6/s5_track2/local/generate_chime6_data.sh b/egs/chime6/s5_track2/local/generate_chime6_data.sh
new file mode 120000
index 00000000000..62882cd6279
--- /dev/null
+++ b/egs/chime6/s5_track2/local/generate_chime6_data.sh
@@ -0,0 +1 @@
+../../s5_track1/local/generate_chime6_data.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/get_best_error.py b/egs/chime6/s5_track2/local/get_best_error.py
new file mode 100755
index 00000000000..651a6322930
--- /dev/null
+++ b/egs/chime6/s5_track2/local/get_best_error.py
@@ -0,0 +1,86 @@
+#! /usr/bin/env python3
+# Copyright   2019   Ashish Arora
+# Apache 2.0.
+"""This script finds best matching of reference and hypothesis speakers.
+  For the best matching speakers,it provides the WER for the reference session
+  (eg:S02) and hypothesis recording (eg: S02_U02)"""
+
+import itertools
+import numpy as np
+import argparse
+from munkres import Munkres
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script finds best matching of reference and hypothesis speakers.
+  For the best matching it provides the WER""")
+    parser.add_argument("WER_dir", type=str,
+                        help="path of WER files")
+    parser.add_argument("recording_id", type=str,
+                        help="recording_id name")
+    args = parser.parse_args()
+    return args
+
+
+def get_results(filename):
+    with open(filename) as f:
+        first_line = f.readline()
+        parts = first_line.strip().split(',')
+        total_words = parts[0].split()[-1]
+        ins = parts[1].split()[0]
+        deletions = parts[2].split()[0]
+        sub = parts[3].split()[0]
+        return total_words, ins, deletions, sub
+
+
+def get_min_wer(recording_id, num_speakers, WER_dir):
+    best_wer_file = WER_dir + '/' + 'best_wer' + '_' + recording_id
+    best_wer_writer = open(best_wer_file, 'w')
+    m = Munkres()
+    total_error_mat = [0] * num_speakers
+    all_errors_mat = [0] * num_speakers
+    for i in range(num_speakers):
+        total_error_mat[i] = [0] * num_speakers
+        all_errors_mat[i] = [0] * num_speakers
+    for i in range(1, num_speakers+1):
+        for j in range(1, num_speakers+1):
+            filename = '/wer_' + recording_id + '_' + 'r' + str(i)+ 'h' + str(j)
+            filename = WER_dir + filename
+            total_words, ins, deletions, sub = get_results(filename)
+            ins = int(ins)
+            dele = int(deletions)
+            sub = int(sub)
+            total_error = ins + dele + sub
+            total_error_mat[i-1][j-1]=total_error
+            all_errors_mat[i-1][j-1]= (total_words, total_error, ins, dele, sub)
+
+    indexes = m.compute(total_error_mat)
+    total_errors=total_words=total_ins=total_del=total_sub=0
+    spk_order = '('
+    for row, column in indexes:
+        words, errs, ins, dele, sub = all_errors_mat[row][column]
+        total_errors += int(errs)
+        total_words += int(words)
+        total_ins += int(ins)
+        total_del += int(deletions)
+        total_sub += int(sub)
+        spk_order = spk_order + str(column+1) + ', '
+    spk_order = spk_order + ')' 
+    text = "Best error: (#T #E #I #D #S) " + str(total_words)+ ', '+str(total_errors)+ ', '+str(total_ins)+ ', '+str(total_del)+ ', '+str(total_sub)
+    best_wer_writer.write(" recording_id: "+ recording_id + ' ')
+    best_wer_writer.write(' best hypothesis speaker order: ' + spk_order + ' ')
+    best_wer_writer.write(text+ '\n')
+    print("recording_id: "+ recording_id + ' ')
+    print('best hypothesis speaker order: ' + spk_order + ' ')
+    print(text)
+    best_wer_writer.close()
+
+
+def main():
+    args = get_args()
+    num_speakers = 4
+    get_min_wer(args.recording_id, num_speakers, args.WER_dir)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py b/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
new file mode 100755
index 00000000000..7b3e14aaa49
--- /dev/null
+++ b/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
@@ -0,0 +1,56 @@
+#! /usr/bin/env python
+# Copyright   2019   Ashish Arora
+# Apache 2.0.
+"""This script splits a kaldi (text) file
+  into per_array per_session per_speaker hypothesis (text) files"""
+
+import argparse
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script splits a kaldi text file
+        into per_array per_session per_speaker  text files""")
+    parser.add_argument("input_text_path", type=str,
+                        help="path of text files")
+    parser.add_argument("output_dir_path", type=str,
+                        help="Output path for per_array per_session per_speaker reference files")
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    # S09_U06.ENH-4-704588-704738
+    args = get_args()
+    sessionid_micid_speakerid_dict= {}
+    for line in open(args.input_text_path):
+        parts = line.strip().split()
+        uttid_id = parts[0]
+        temp = uttid_id.strip().split('.')[0]
+        micid = temp.strip().split('_')[1]
+        speakerid = uttid_id.strip().split('-')[1]
+        sessionid = uttid_id.strip().split('_')[0]
+        sessionid_micid_speakerid = sessionid + '_' + micid + '_' + speakerid
+        if sessionid_micid_speakerid not in sessionid_micid_speakerid_dict:
+            sessionid_micid_speakerid_dict[sessionid_micid_speakerid]=list()
+        sessionid_micid_speakerid_dict[sessionid_micid_speakerid].append(line)
+
+    for sessionid_micid_speakerid in sorted(sessionid_micid_speakerid_dict):
+        hyp_file = args.output_dir_path + '/' + 'hyp' + '_' + sessionid_micid_speakerid
+        hyp_writer = open(hyp_file, 'w')
+        combined_hyp_file = args.output_dir_path + '/' + 'hyp' + '_' + sessionid_micid_speakerid + '_comb'
+        combined_hyp_writer = open(combined_hyp_file, 'w')
+        utterances = sessionid_micid_speakerid_dict[sessionid_micid_speakerid]
+        text = ''
+        for line in utterances:
+            parts = line.strip().split()
+            text = text + ' ' + ' '.join(parts[1:])
+            hyp_writer.write(line)
+        combined_utterance = 'utt' + " " + text
+        combined_hyp_writer.write(combined_utterance)
+        combined_hyp_writer.write('\n')
+        combined_hyp_writer.close()
+        hyp_writer.close()
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py b/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
new file mode 100755
index 00000000000..6b00e29e6b1
--- /dev/null
+++ b/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
@@ -0,0 +1,79 @@
+#! /usr/bin/env python
+# Copyright   2019   Ashish Arora
+# Apache 2.0.
+"""This script splits a kaldi (text) file
+  into per_speaker per_session reference (text) file"""
+
+import argparse
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script splits a kaldi text file
+        into per_speaker per_session text files""")
+    parser.add_argument("input_text_path", type=str,
+                        help="path of text file")
+    parser.add_argument("output_dir_path", type=str,
+                        help="Output path for per_session per_speaker reference files")
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = get_args()
+    sessionid_speakerid_dict= {}
+    spkrid_mapping = {}
+    for line in open(args.input_text_path):
+        parts = line.strip().split()
+        uttid_id = parts[0]
+        speakerid = uttid_id.strip().split('_')[0]
+        sessionid = uttid_id.strip().split('_')[1]
+        sessionid_speakerid = sessionid + '_' + speakerid
+        if sessionid_speakerid not in sessionid_speakerid_dict:
+            sessionid_speakerid_dict[sessionid_speakerid]=list()
+        sessionid_speakerid_dict[sessionid_speakerid].append(line)
+
+    spkr_num = 1
+    prev_sessionid = ''
+    for sessionid_speakerid in sorted(sessionid_speakerid_dict):
+        spkr_id = sessionid_speakerid.strip().split('_')[1]
+        curr_sessionid = sessionid_speakerid.strip().split('_')[0]
+        if prev_sessionid != curr_sessionid:
+            prev_sessionid = curr_sessionid
+            spkr_num = 1
+        if spkr_id not in spkrid_mapping:
+            spkrid_mapping[spkr_id] = spkr_num
+            spkr_num += 1
+
+    for sessionid_speakerid in sorted(sessionid_speakerid_dict):
+        ref_file = args.output_dir_path + '/ref_' + sessionid_speakerid.split('_')[0] + '_' + str(
+            spkrid_mapping[sessionid_speakerid.split('_')[1]])
+        ref_writer = open(ref_file, 'w')
+        wc_file = args.output_dir_path + '/ref_wc_' + sessionid_speakerid.split('_')[0] + '_' + str(
+            spkrid_mapping[sessionid_speakerid.split('_')[1]])
+        wc_writer = open(wc_file, 'w')
+        combined_ref_file = args.output_dir_path + '/ref_' + sessionid_speakerid.split('_')[0] + '_' + str(
+            spkrid_mapping[sessionid_speakerid.split('_')[1]]) + '_comb'
+        combined_ref_writer = open(combined_ref_file, 'w')
+        utterances = sessionid_speakerid_dict[sessionid_speakerid]
+        text = ''
+        uttid_wc = 'utt'
+        for line in utterances:
+            parts = line.strip().split()
+            uttid_id = parts[0]
+            utt_text = ' '.join(parts[1:])
+            text = text + ' ' + ' '.join(parts[1:])
+            ref_writer.write(line)
+            length = str(len(utt_text.split()))
+            uttid_id_len = uttid_id + ":" + length
+            uttid_wc = uttid_wc + ' ' + uttid_id_len
+        combined_utterance = 'utt' + " " + text
+        combined_ref_writer.write(combined_utterance)
+        combined_ref_writer.write('\n')
+        combined_ref_writer.close()
+        wc_writer.write(uttid_wc)
+        wc_writer.write('\n')
+        wc_writer.close()
+        ref_writer.close()
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/chime6/s5_track2/local/install_pb_chime5.sh b/egs/chime6/s5_track2/local/install_pb_chime5.sh
new file mode 120000
index 00000000000..ce5ea5f9f08
--- /dev/null
+++ b/egs/chime6/s5_track2/local/install_pb_chime5.sh
@@ -0,0 +1 @@
+../../s5_track1/local/install_pb_chime5.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/json2text.py b/egs/chime6/s5_track2/local/json2text.py
new file mode 120000
index 00000000000..2aa0a8dd1f9
--- /dev/null
+++ b/egs/chime6/s5_track2/local/json2text.py
@@ -0,0 +1 @@
+../../s5_track1/local/json2text.py
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/make_noise_list.py b/egs/chime6/s5_track2/local/make_noise_list.py
new file mode 120000
index 00000000000..d8dcc7822fc
--- /dev/null
+++ b/egs/chime6/s5_track2/local/make_noise_list.py
@@ -0,0 +1 @@
+../../s5_track1/local/make_noise_list.py
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/make_voxceleb1.pl b/egs/chime6/s5_track2/local/make_voxceleb1.pl
new file mode 100755
index 00000000000..2268c20ab52
--- /dev/null
+++ b/egs/chime6/s5_track2/local/make_voxceleb1.pl
@@ -0,0 +1,130 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#
+# Usage: make_voxceleb1.pl /export/voxceleb1 data/
+
+if (@ARGV != 2) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 data/\n";
+  exit(1);
+}
+
+($data_base, $out_dir) = @ARGV;
+my $out_test_dir = "$out_dir/voxceleb1_test";
+my $out_train_dir = "$out_dir/voxceleb1_train";
+
+if (system("mkdir -p $out_test_dir") != 0) {
+  die "Error making directory $out_test_dir";
+}
+
+if (system("mkdir -p $out_train_dir") != 0) {
+  die "Error making directory $out_train_dir";
+}
+
+opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
+
+if (! -e "$data_base/voxceleb1_test.txt") {
+  system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt");
+}
+
+if (! -e "$data_base/vox1_meta.csv") {
+  system("wget -O $data_base/vox1_meta.csv http://www.openslr.org/resources/49/vox1_meta.csv");
+}
+
+open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt";
+open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv";
+open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk";
+open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp";
+open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk";
+open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp";
+open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials";
+
+my %id2spkr = ();
+while (<META_IN>) {
+  chomp;
+  my ($vox_id, $spkr_id, $gender, $nation, $set) = split;
+  $id2spkr{$vox_id} = $spkr_id;
+}
+
+my $test_spkrs = ();
+while (<TRIAL_IN>) {
+  chomp;
+  my ($tar_or_non, $path1, $path2) = split;
+
+  # Create entry for left-hand side of trial
+  my ($spkr_id, $filename) = split('/', $path1);
+  my $rec_id = substr($filename, 0, 11);
+  my $segment = substr($filename, 12, 7);
+  my $utt_id1 = "$spkr_id-$rec_id-$segment";
+  $test_spkrs{$spkr_id} = ();
+
+  # Create entry for right-hand side of trial
+  my ($spkr_id, $filename) = split('/', $path2);
+  my $rec_id = substr($filename, 0, 11);
+  my $segment = substr($filename, 12, 7);
+  my $utt_id2 = "$spkr_id-$rec_id-$segment";
+  $test_spkrs{$spkr_id} = ();
+
+  my $target = "nontarget";
+  if ($tar_or_non eq "1") {
+    $target = "target";
+  }
+  print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
+}
+
+foreach (@spkr_dirs) {
+  my $spkr_id = $_;
+  my $new_spkr_id = $spkr_id;
+  # If we're using a newer version of VoxCeleb1, we need to "deanonymize"
+  # the speaker labels.
+  if (exists $id2spkr{$spkr_id}) {
+    $new_spkr_id = $id2spkr{$spkr_id};
+  }
+  opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
+  my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+  closedir $dh;
+  foreach (@files) {
+    my $filename = $_;
+    my $rec_id = substr($filename, 0, 11);
+    my $segment = substr($filename, 12, 7);
+    my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
+    my $utt_id = "$new_spkr_id-$rec_id-$segment";
+    if (exists $test_spkrs{$new_spkr_id}) {
+      print WAV_TEST "$utt_id", " $wav", "\n";
+      print SPKR_TEST "$utt_id", " $new_spkr_id", "\n";
+    } else {
+      print WAV_TRAIN "$utt_id", " $wav", "\n";
+      print SPKR_TRAIN "$utt_id", " $new_spkr_id", "\n";
+    }
+  }
+}
+
+close(SPKR_TEST) or die;
+close(WAV_TEST) or die;
+close(SPKR_TRAIN) or die;
+close(WAV_TRAIN) or die;
+close(TRIAL_OUT) or die;
+close(TRIAL_IN) or die;
+close(META_IN) or die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_test_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_test_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_test_dir") != 0) {
+  die "Error validating directory $out_test_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_train_dir/utt2spk >$out_train_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_train_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_train_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_train_dir") != 0) {
+  die "Error validating directory $out_train_dir";
+}
diff --git a/egs/chime6/s5_track2/local/make_voxceleb2.pl b/egs/chime6/s5_track2/local/make_voxceleb2.pl
new file mode 100755
index 00000000000..34c1591eba3
--- /dev/null
+++ b/egs/chime6/s5_track2/local/make_voxceleb2.pl
@@ -0,0 +1,70 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#
+# Usage: make_voxceleb2.pl /export/voxceleb2 dev data/dev
+#
+# Note: This script requires ffmpeg to be installed and its location included in $PATH.
+
+if (@ARGV != 3) {
+  print STDERR "Usage: $0 <path-to-voxceleb2> <dataset> <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb2 dev data/dev\n";
+  exit(1);
+}
+
+# Check that ffmpeg is installed.
+if (`which ffmpeg` eq "") {
+  die "Error: this script requires that ffmpeg is installed.";
+}
+
+($data_base, $dataset, $out_dir) = @ARGV;
+
+if ("$dataset" ne "dev" && "$dataset" ne "test") {
+  die "dataset parameter must be 'dev' or 'test'!";
+}
+
+opendir my $dh, "$data_base/$dataset/aac" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$data_base/$dataset/aac/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_dir";
+}
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+
+foreach (@spkr_dirs) {
+  my $spkr_id = $_;
+
+  opendir my $dh, "$data_base/$dataset/aac/$spkr_id/" or die "Cannot open directory: $!";
+  my @rec_dirs = grep {-d "$data_base/$dataset/aac/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh);
+  closedir $dh;
+
+  foreach (@rec_dirs) {
+    my $rec_id = $_;
+
+    opendir my $dh, "$data_base/$dataset/aac/$spkr_id/$rec_id/" or die "Cannot open directory: $!";
+    my @files = map{s/\.[^.]+$//;$_}grep {/\.m4a$/} readdir($dh);
+    closedir $dh;
+
+    foreach (@files) {
+      my $name = $_;
+      my $wav = "ffmpeg -v 8 -i $data_base/$dataset/aac/$spkr_id/$rec_id/$name.m4a -f wav -acodec pcm_s16le -|";
+      my $utt_id = "$spkr_id-$rec_id-$name";
+      print WAV "$utt_id", " $wav", "\n";
+      print SPKR "$utt_id", " $spkr_id", "\n";
+    }
+  }
+}
+close(SPKR) or die;
+close(WAV) or die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/chime6/s5_track2/local/multispeaker_score.sh b/egs/chime6/s5_track2/local/multispeaker_score.sh
new file mode 100755
index 00000000000..e632381ade8
--- /dev/null
+++ b/egs/chime6/s5_track2/local/multispeaker_score.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright   2019   Ashish Arora, Yusuke Fujita
+# Apache 2.0.
+# This script takes a reference and hypothesis text file, and performs 
+# multispeaker scoring.
+
+stage=0
+cmd=queue.pl
+num_spkrs=4
+num_hyp_spk=4
+datadir=dev_beamformit_dereverb
+declare -a recording_id_array=("S02_U06" "S09_U06")
+echo "$0 $@"  # Print the command line for logging
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 <ref-file> <hyp-file> <out-dir>"
+  echo "e.g.: $0 data/diarized/text data/dev \
+    exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn1b_sp/decode_dev_xvector_sad/scoring_kaldi/penalty_1.0/10.txt \
+    exp/chain_train_worn_simu_u400k_cleaned_rvb/tdnn1b_sp/decode_dev_xvector_sad/scoring_kaldi_multispeaker"
+  echo "Options: "
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  exit 1;
+fi
+
+ref_file=$1
+hyp_file=$2
+out_dir=$3
+
+output_dir=$out_dir/per_speaker_output
+wer_dir=$out_dir/per_speaker_wer
+
+# For dev and evaluation set, we take corresopnding arrays
+if [[ ${datadir} == *dev* ]]; then
+  recording_id_array=("S02_U06" "S09_U06")
+fi
+
+if [[ ${datadir} == *eval* ]]; then
+  recording_id_array=("S01_U06" "S21_U06")
+fi
+
+for f in $ref_file $hyp_file; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+if [ $stage -le 0 ]; then
+  echo "$0 generate per speaker per session file at paragraph level for the reference"
+  echo "and per speaker per array file at paraghaph level for the hypothesis"
+  mkdir -p $output_dir $wer_dir
+  local/wer_output_filter < $ref_file > $output_dir/ref_filt.txt
+  local/wer_output_filter < $hyp_file > $output_dir/hyp_filt.txt
+  local/get_ref_perspeaker_persession_file.py $output_dir/ref_filt.txt $output_dir
+  local/get_hyp_perspeaker_perarray_file.py $output_dir/hyp_filt.txt $output_dir
+fi
+
+if [ $stage -le 1 ]; then
+  if [ $num_hyp_spk -le 3 ]; then
+    echo "$0 create dummy per speaker per array hypothesis files for if the"
+    echo " perdicted number of speakers by diarization is less than 4 "
+    for recording_id in "${recording_id_array[@]}"; do
+      for (( i=$num_hyp_spk+1; i<$num_spkrs+1; i++ )); do
+        echo 'utt ' > ${dir}/hyp_${recording_id}_${i}_comb
+      done
+    done
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0 calculate wer for each ref and hypothesis speaker"
+  for recording_id in "${recording_id_array[@]}"; do
+    for (( i=0; i<$((num_spkrs * num_spkrs)); i++ )); do
+      ind_r=$((i / num_spkrs + 1))
+      ind_h=$((i % num_spkrs + 1))
+      sessionid="$(echo $recording_id | cut -d'_' -f1)"
+
+      # compute WER with combined texts
+      compute-wer --text --mode=present ark:${output_dir}/ref_${sessionid}_${ind_r}_comb \
+        ark:${output_dir}/hyp_${recording_id}_${ind_h}_comb \
+        > $wer_dir/wer_${recording_id}_r${ind_r}h${ind_h} 2>/dev/null
+    done
+
+    local/get_best_error.py $wer_dir $recording_id
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0 print best word error rate"
+  echo "$0 it will print best wer for each recording and each array"
+  cat $wer_dir/best_wer* > $wer_dir/all.txt
+  cat $wer_dir/all.txt | local/print_dset_error.py $output_dir/recordinid_spkorder
+fi
+
+mkdir -p $wer_dir/wer_details $wer_dir/wer_details/log/
+if [ $stage -le 4 ]; then
+  echo "$0 generate per utterance wer details at utterance level"
+  while read -r line;
+  do
+    recording_id=$(echo "$line" | cut -f1 -d ":")
+    spkorder_str=$(echo "$line" | cut -f2 -d ":")
+    sessionid=$(echo "$line" | cut -f1 -d "_")
+    IFS='_' read -r -a spkorder_list <<< "$spkorder_str"
+    IFS=" "
+    ind_r=1
+    for ind_h in "${spkorder_list[@]}"; do
+
+      $cmd $wer_dir/wer_details/log/${recording_id}_r${ind_r}h${ind_h}_comb.log \
+        align-text ark:${output_dir}/ref_${sessionid}_${ind_r}_comb ark:${output_dir}/hyp_${recording_id}_${ind_h}_comb ark:$output_dir/alignment_${sessionid}_r${ind_r}h${ind_h}.txt
+
+      # split hypothesis texts along with reference utterances using word alignment of combined texts
+      local/gen_aligned_hyp.py $output_dir/alignment_${sessionid}_r${ind_r}h${ind_h}.txt ${output_dir}/ref_wc_${sessionid}_${ind_r} > ${output_dir}/hyp_${recording_id}_r${ind_r}h${ind_h}_ref_segmentation
+
+      ## compute per utterance alignments
+      $cmd $wer_dir/wer_details/log/${recording_id}_r${ind_r}h${ind_h}_per_utt.log \
+        cat ${output_dir}/hyp_${recording_id}_r${ind_r}h${ind_h}_ref_segmentation \| \
+        align-text --special-symbol="'***'" ark:${output_dir}/ref_${sessionid}_${ind_r} ark:- ark,t:- \|  \
+        utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $wer_dir/wer_details/per_utt_${recording_id}_r${ind_r}h${ind_h} || exit 1
+
+      $cmd $wer_dir/wer_details/log/${recording_id}_r${ind_r}h${ind_h}_ops.log \
+        cat $wer_dir/wer_details/per_utt_${recording_id}_r${ind_r}h${ind_h} \| \
+        utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \
+        sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $wer_dir/wer_details/ops_${recording_id}_r${ind_r}h${ind_h} || exit 1;
+
+      ind_r=$(( ind_r + 1 ))
+    done
+  done < $output_dir/recordinid_spkorder
+  echo "$0 done generating per utterance wer details"
+fi
+
+echo "$0 done scoring"
diff --git a/egs/chime6/s5_track2/local/nnet3/compare_wer.sh b/egs/chime6/s5_track2/local/nnet3/compare_wer.sh
new file mode 120000
index 00000000000..87041e833d0
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/compare_wer.sh
@@ -0,0 +1 @@
+../../../s5_track1/local/nnet3/compare_wer.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/nnet3/decode.sh b/egs/chime6/s5_track2/local/nnet3/decode.sh
new file mode 120000
index 00000000000..32595ccedbc
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/decode.sh
@@ -0,0 +1 @@
+../../../s5_track1/local/nnet3/decode.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/nnet3/run_ivector_common.sh b/egs/chime6/s5_track2/local/nnet3/run_ivector_common.sh
new file mode 120000
index 00000000000..4161993c225
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/run_ivector_common.sh
@@ -0,0 +1 @@
+../../../s5_track1/local/nnet3/run_ivector_common.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
new file mode 100755
index 00000000000..cb8fe2e6326
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+#
+# Apache 2.0.
+
+# This script applies sliding window CMVN and writes the features to disk.
+#
+# Although this kind of script isn't necessary in speaker recognition recipes,
+# it can be helpful in the diarization recipes.  The script
+# diarization/nnet3/xvector/extract_xvectors.sh extracts x-vectors from very
+# short (e.g., 1-2 seconds) segments.  Therefore, in order to apply the sliding
+# window CMVN in a meaningful way, it must be performed prior to performing
+# the subsegmentation.
+
+nj=40
+cmd="run.pl"
+stage=0
+norm_vars=false
+center=true
+compress=true
+cmn_window=300
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+if [ $# != 3 ]; then
+  echo "Usage: $0 <in-data-dir> <out-data-dir> <feat-dir>"
+  echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features"
+  echo "Options: "
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --norm-vars <true|false>                         # If true, normalize variances in the sliding window cmvn"
+  exit 1;
+fi
+
+data_in=$1
+data_out=$2
+dir=$3
+
+name=`basename $data_in`
+
+for f in $data_in/feats.scp ; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+# Set various variables.
+mkdir -p $dir/log
+mkdir -p $data_out
+featdir=$(utils/make_absolute.sh $dir)
+
+if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then
+  utils/create_split_dir.pl \
+    /export/b{14,15,16,17}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_cmvn_feats/storage $featdir/storage
+fi
+
+for n in $(seq $nj); do
+  # the next command does nothing unless $featdir/storage/ exists, see
+  # utils/create_data_link.pl for more info.
+  utils/create_data_link.pl $featdir/xvector_cmvn_feats_${name}.${n}.ark
+done
+
+cp $data_in/utt2spk $data_out/utt2spk
+cp $data_in/spk2utt $data_out/spk2utt
+cp $data_in/wav.scp $data_out/wav.scp
+for f in $data_in/segments $data_in/segments/vad.scp ; do
+  [ -f $f ] && cp $f $data_out/`basename $f`;
+done
+
+write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB"
+
+sdata_in=$data_in/split$nj;
+utils/split_data.sh $data_in $nj || exit 1;
+
+$cmd JOB=1:$nj $dir/log/create_xvector_cmvn_feats_${name}.JOB.log \
+  apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \
+  scp:${sdata_in}/JOB/feats.scp ark:- \| \
+  copy-feats --compress=$compress $write_num_frames_opt ark:- \
+  ark,scp:$featdir/xvector_cmvn_feats_${name}.JOB.ark,$featdir/xvector_cmvn_feats_${name}.JOB.scp || exit 1;
+
+for n in $(seq $nj); do
+  cat $featdir/xvector_cmvn_feats_${name}.$n.scp || exit 1;
+done > ${data_out}/feats.scp || exit 1
+
+for n in $(seq $nj); do
+  cat $featdir/log/utt2num_frames.$n || exit 1;
+done > $data_out/utt2num_frames || exit 1
+rm $featdir/log/utt2num_frames.*
+
+echo "$0: Succeeded creating xvector features for $name"
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
new file mode 100755
index 00000000000..dcdbe1b1593
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+#
+# Apache 2.0.
+
+# This script applies sliding window CMVN and removes silence frames.  This
+# is performed on the raw features prior to generating examples for training
+# the x-vector system.  Once the training examples are generated, the features
+# created by this script can be removed.
+
+nj=40
+cmd="run.pl"
+stage=0
+norm_vars=false
+center=true
+compress=true
+cmn_window=300
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+if [ $# != 3 ]; then
+  echo "Usage: $0 <in-data-dir> <out-data-dir> <feat-dir>"
+  echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features"
+  echo "Options: "
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --norm-vars <true|false>                         # If true, normalize variances in the sliding window cmvn"
+  exit 1;
+fi
+
+data_in=$1
+data_out=$2
+dir=$3
+
+name=`basename $data_in`
+
+for f in $data_in/feats.scp $data_in/vad.scp ; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+# Set various variables.
+mkdir -p $dir/log
+mkdir -p $data_out
+featdir=$(utils/make_absolute.sh $dir)
+
+if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then
+  utils/create_split_dir.pl \
+    /export/b{14,15,16,17}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage
+fi
+
+for n in $(seq $nj); do
+  # the next command does nothing unless $featdir/storage/ exists, see
+  # utils/create_data_link.pl for more info.
+  utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark
+done
+
+cp $data_in/utt2spk $data_out/utt2spk
+cp $data_in/spk2utt $data_out/spk2utt
+cp $data_in/wav.scp $data_out/wav.scp
+
+write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB"
+
+sdata_in=$data_in/split$nj;
+utils/split_data.sh $data_in $nj || exit 1;
+
+$cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \
+  apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \
+  scp:${sdata_in}/JOB/feats.scp ark:- \| \
+  select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \
+  copy-feats --compress=$compress $write_num_frames_opt ark:- \
+  ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1;
+
+for n in $(seq $nj); do
+  cat $featdir/xvector_feats_${name}.$n.scp || exit 1;
+done > ${data_out}/feats.scp || exit 1
+
+for n in $(seq $nj); do
+  cat $featdir/log/utt2num_frames.$n || exit 1;
+done > $data_out/utt2num_frames || exit 1
+rm $featdir/log/utt2num_frames.*
+
+echo "$0: Succeeded creating xvector features for $name"
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/run_xvector.sh b/egs/chime6/s5_track2/local/nnet3/xvector/run_xvector.sh
new file mode 120000
index 00000000000..585b63fd2dd
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/run_xvector.sh
@@ -0,0 +1 @@
+tuning/run_xvector_1a.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
new file mode 100755
index 00000000000..94fc7e7682f
--- /dev/null
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Copyright      2018   David Snyder
+#                2018   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#                2018   Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This script trains the x-vector DNN.  The recipe is similar to the one
+# described in "Diarization is Hard: Some Experiences and Lessons Learned
+# for the JHU Team in the Inaugural DIHARD Challenge" by Sell et al.
+
+. ./cmd.sh
+set -e
+
+stage=1
+train_stage=-1
+use_gpu=true
+remove_egs=false
+
+data=data/train
+nnet_dir=exp/xvector_nnet_1a/
+egs_dir=exp/xvector_nnet_1a/egs
+
+. ./path.sh
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l)
+
+# Now we create the nnet examples using sid/nnet3/xvector/get_egs.sh.
+# The argument --num-repeats is related to the number of times a speaker
+# repeats per archive.  If it seems like you're getting too many archives
+# (e.g., more than 200) try increasing the --frames-per-iter option.  The
+# arguments --min-frames-per-chunk and --max-frames-per-chunk specify the
+# minimum and maximum length (in terms of number of frames) of the features
+# in the examples.
+#
+# To make sense of the egs script, it may be necessary to put an "exit 1"
+# command immediately after stage 3.  Then, inspect
+# exp/<your-dir>/egs/temp/ranges.* . The ranges files specify the examples that
+# will be created, and which archives they will be stored in.  Each line of
+# ranges.* has the following form:
+#    <utt-id> <local-ark-indx> <global-ark-indx> <start-frame> <end-frame> <spk-id>
+# For example:
+#    100304-f-sre2006-kacg-A 1 2 4079 881 23
+
+# If you're satisfied with the number of archives (e.g., 50-150 archives is
+# reasonable) and with the number of examples per speaker (e.g., 1000-5000
+# is reasonable) then you can let the script continue to the later stages.
+# Otherwise, try increasing or decreasing the --num-repeats option.  You might
+# need to fiddle with --frames-per-iter.  Increasing this value decreases the
+# the number of archives and increases the number of examples per archive.
+# Decreasing this value increases the number of archives, while decreasing the
+# number of examples per archive.
+if [ $stage -le 6 ]; then
+  echo "$0: Getting neural network training egs";
+  # dump egs.
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b{03,04,05,06}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage
+  fi
+  sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
+    --nj 8 \
+    --stage 0 \
+    --frames-per-iter 1000000000 \
+    --frames-per-iter-diagnostic 500000 \
+    --min-frames-per-chunk 200 \
+    --max-frames-per-chunk 400 \
+    --num-diagnostic-archives 3 \
+    --num-repeats 40 \
+    "$data" $egs_dir
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}')
+  feat_dim=$(cat $egs_dir/info/feat_dim)
+
+  # This chunk-size corresponds to the maximum number of frames the
+  # stats layer is able to pool over.  In this script, it corresponds
+  # to 4 seconds.  If the input recording is greater than 4 seconds,
+  # we will compute multiple xvectors from the same recording and average
+  # to produce the final xvector.
+  max_chunk_size=400
+
+  # The smallest number of frames we're comfortable computing an xvector from.
+  # Note that the hard minimum is given by the left and right context of the
+  # frame-level layers.
+  min_chunk_size=20
+  mkdir -p $nnet_dir/configs
+  cat <<EOF > $nnet_dir/configs/network.xconfig
+  # please note that it is important to have input layer with the name=input
+
+  # The frame-level layers
+  input dim=${feat_dim} name=input
+  relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512
+  relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512
+  relu-batchnorm-layer name=tdnn4 dim=512
+  relu-batchnorm-layer name=tdnn5 dim=1500
+
+  # The stats pooling layer. Layers after this are segment-level.
+  # In the config below, the first and last argument (0, and ${max_chunk_size})
+  # means that we pool over an input segment starting at frame 0
+  # and ending at frame ${max_chunk_size} or earlier.  The other arguments (1:1)
+  # mean that no subsampling is performed.
+  stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size})
+
+  # This is where we usually extract the embedding (aka xvector) from.
+  relu-batchnorm-layer name=tdnn6 dim=128 input=stats
+  output-layer name=output include-log-softmax=true dim=${num_targets}
+EOF
+
+  steps/nnet3/xconfig_to_configs.py \
+      --xconfig-file $nnet_dir/configs/network.xconfig \
+      --config-dir $nnet_dir/configs/
+  cp $nnet_dir/configs/final.config $nnet_dir/nnet.config
+
+  # These three files will be used by sid/nnet3/xvector/extract_xvectors.sh
+  echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract.config
+  echo "$max_chunk_size" > $nnet_dir/max_chunk_size
+  echo "$min_chunk_size" > $nnet_dir/min_chunk_size
+fi
+
+dropout_schedule='0,0@0.20,0.1@0.50,0'
+srand=123
+if [ $stage -le 8 ]; then
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --trainer.optimization.proportional-shrink 10 \
+    --trainer.optimization.momentum=0.5 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=8 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.minibatch-size=64 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2 \
+    --trainer.num-epochs=3 \
+    --trainer.dropout-schedule="$dropout_schedule" \
+    --trainer.shuffle-buffer-size=1000 \
+    --egs.frames-per-eg=1 \
+    --egs.dir="$egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --cleanup.preserve-model-interval=10 \
+    --use-gpu=true \
+    --dir=$nnet_dir  || exit 1;
+fi
+
+exit 0;
diff --git a/egs/chime6/s5_track2/local/prepare_data.sh b/egs/chime6/s5_track2/local/prepare_data.sh
new file mode 100755
index 00000000000..c6b8121dab0
--- /dev/null
+++ b/egs/chime6/s5_track2/local/prepare_data.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
+# Apache 2.0
+
+# Begin configuration section.
+mictype=worn # worn, ref or others
+cleanup=true
+train=true
+
+# End configuration section
+. ./utils/parse_options.sh  # accept options.. you can run this run.sh with the
+
+. ./path.sh
+
+echo >&2 "$0" "$@"
+if [ $# -ne 3 ] ; then
+  echo >&2 "$0" "$@"
+  echo >&2 "$0: Error: wrong number of arguments"
+  echo -e >&2 "Usage:\n  $0 [opts] <audio-dir> <json-transcript-dir> <output-dir>"
+  echo -e >&2 "eg:\n  $0 /corpora/chime5/audio/train /corpora/chime5/transcriptions/train data/train"
+  exit 1
+fi
+
+set -e -o pipefail
+
+adir=$1
+jdir=$2
+dir=$3
+
+json_count=$(find -L $jdir -name "*.json" | wc -l)
+wav_count=$(find -L $adir -name "*.wav" | wc -l)
+
+if [ "$json_count" -eq 0 ]; then
+  echo >&2 "We expect that the directory $jdir will contain json files."
+  echo >&2 "That implies you have supplied a wrong path to the data."
+  exit 1
+fi
+if [ "$wav_count" -eq 0 ]; then
+  echo >&2 "We expect that the directory $adir will contain wav files."
+  echo >&2 "That implies you have supplied a wrong path to the data."
+  exit 1
+fi
+
+echo "$0: Converting transcription to text"
+
+mkdir -p $dir
+for file in $jdir/*json; do
+  ./local/json2text.py --mictype $mictype $file
+done | \
+  sed -e "s/\[inaudible[- 0-9]*\]/[inaudible]/g" |\
+  sed -e 's/ - / /g' |\
+  sed -e 's/mm-/mm/g' > $dir/text.orig
+
+echo "$0: Creating datadir $dir for type=\"$mictype\""
+
+if [ $mictype == "worn" ]; then
+  # convert the filenames to wav.scp format, use the basename of the file
+  # as a the wav.scp key, add .L and .R for left and right channel
+  # i.e. each file will have two entries (left and right channel)
+  find -L $adir -name  "S[0-9]*_P[0-9]*.wav" | \
+    perl -ne '{
+      chomp;
+      $path = $_;
+      next unless $path;
+      @F = split "/", $path;
+      ($f = $F[@F-1]) =~ s/.wav//;
+      @F = split "_", $f;
+      print "${F[1]}_${F[0]}.L sox $path -t wav - remix 1 |\n";
+      print "${F[1]}_${F[0]}.R sox $path -t wav - remix 2 |\n";
+    }' | sort > $dir/wav.scp
+
+  # generate the transcripts for both left and right channel
+  # from the original transcript in the form
+  # P09_S03-0006072-0006147 gimme the baker
+  # create left and right channel transcript
+  # P09_S03.L-0006072-0006147 gimme the baker
+  # P09_S03.R-0006072-0006147 gimme the baker
+  sed -n 's/  *$//; h; s/-/\.L-/p; g; s/-/\.R-/p' $dir/text.orig | sort > $dir/text
+elif [ $mictype == "ref" ]; then
+  # fixed reference array
+  
+  # first get a text, which will be used to extract reference arrays
+  perl -ne 's/-/.ENH-/;print;' $dir/text.orig | sort > $dir/text
+
+  find -L $adir | grep "\.wav" | sort > $dir/wav.flist
+  # following command provide the argument for grep to extract only reference arrays
+  #grep `cut -f 1 -d"-" $dir/text | awk -F"_" '{print $2 "_" $3}' | sed -e "s/\.ENH//" | sort | uniq | sed -e "s/^/ -e /" | tr "\n" " "` $dir/wav.flist > $dir/wav.flist2
+  paste -d" " \
+	<(awk -F "/" '{print $NF}' $dir/wav.flist | sed -e "s/\.wav/.ENH/") \
+	$dir/wav.flist | sort > $dir/wav.scp
+else
+  # array mic case
+  # convert the filenames to wav.scp format, use the basename of the file
+  # as a the wav.scp key
+  find -L $adir -name "*.wav" -ipath "*${mictype}*" |\
+    perl -ne '$p=$_;chomp $_;@F=split "/";$F[$#F]=~s/\.wav//;print "$F[$#F] $p";' |\
+    sort -u > $dir/wav.scp
+
+  # convert the transcripts from
+  # P09_S03-0006072-0006147 gimme the baker
+  # to the per-channel transcripts
+  # P09_S03_U01_NOLOCATION.CH1-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH2-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH3-0006072-0006147 gimme the baker
+  # P09_S03_U01_NOLOCATION.CH4-0006072-0006147 gimme the baker
+  perl -ne '$l=$_;
+    for($i=1; $i<=4; $i++) {
+      ($x=$l)=~ s/-/.CH\Q$i\E-/;
+      print $x;}' $dir/text.orig | sort > $dir/text
+
+fi
+$cleanup && rm -f $dir/text.* $dir/wav.scp.* $dir/wav.flist
+
+# Prepare 'segments', 'utt2spk', 'spk2utt'
+if [ $mictype == "worn" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" \
+    > $dir/segments
+elif [ $mictype == "ref" ]; then
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" |\
+    sed -e "s/ P.._/ /" > $dir/segments
+else
+  cut -d" " -f 1 $dir/text | \
+    awk -F"-" '{printf("%s %s %08.2f %08.2f\n", $0, $1, $2/100.0, $3/100.0)}' |\
+    sed -e "s/_[A-Z]*\././2" |\
+    sed -e 's/ P.._/ /' > $dir/segments
+fi
+cut -f 1 -d ' ' $dir/segments | \
+  perl -ne 'chomp;$utt=$_;s/_.*//;print "$utt $_\n";' > $dir/utt2spk
+
+utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
+
+if [ $train != 'true' ]; then
+  # For scoring the final system, we need the original utt2spk
+  # and text file. So we keep them with the extension .bak here
+  # so that they don't affect the validate_data_dir steps in
+  # the intermediate steps.
+  for file in text utt2spk spk2utt segments; do
+    mv $dir/$file $dir/$file.bak
+  done
+  
+  # For dev and eval data, prepare pseudo utt2spk.
+  awk '{print $1, $1}' $dir/wav.scp > $dir/utt2spk
+  utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
+fi
diff --git a/egs/chime6/s5_track2/local/prepare_dict.sh b/egs/chime6/s5_track2/local/prepare_dict.sh
new file mode 120000
index 00000000000..ada30947463
--- /dev/null
+++ b/egs/chime6/s5_track2/local/prepare_dict.sh
@@ -0,0 +1 @@
+../../s5_track1/local/prepare_dict.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/print_dset_error.py b/egs/chime6/s5_track2/local/print_dset_error.py
new file mode 100755
index 00000000000..8d7988e2785
--- /dev/null
+++ b/egs/chime6/s5_track2/local/print_dset_error.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# Copyright   2019   Ashish Arora
+# Apache 2.0.
+
+import sys, io
+import string
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+spkorder_writer = open(sys.argv[1],'w', encoding='utf8')
+total_words={}
+total_errors={}
+spk_order={}
+total_errors_arrayid={}
+total_words_arrayid={}
+
+output.write('WER for each recording: \n')
+for line in infile:
+    toks = line.strip().split()
+    recordingid = toks[1]
+    total_words[recordingid] = toks[-5][:-1]
+    total_errors[recordingid] = toks[-4][:-1]
+    spk_order[recordingid] = toks[6][1] + '_' + toks[7][0] + '_' + toks[8][0] + '_' + toks[9][0]
+    arrayid=recordingid.strip().split('_')[1]
+    if arrayid not in total_errors_arrayid:
+        total_errors_arrayid[arrayid]=0
+        total_words_arrayid[arrayid]=0
+    total_errors_arrayid[arrayid]+=int(total_errors[recordingid])
+    total_words_arrayid[arrayid]+=int(total_words[recordingid])
+    wer = float(total_errors[recordingid])/float(total_words[recordingid])*100
+    utt = "{0} {1} {2} {3} {4:5.2f}".format(recordingid, spk_order[recordingid], total_words[recordingid], total_errors[recordingid], wer)
+    output.write(utt + '\n')
+    spkorder_writer.write(recordingid + ':' + str(spk_order[recordingid]) + '\n')
+
+
+output.write('WER for each array: \n')
+for arrayid in sorted(total_errors_arrayid):
+    wer = float(total_errors_arrayid[arrayid])/float(total_words_arrayid[arrayid])*100
+    utt = "{0} {1} {2} {3:5.2f}".format(arrayid, total_words_arrayid[arrayid], total_errors_arrayid[arrayid], wer)
+    output.write(utt + '\n')
+
diff --git a/egs/chime6/s5_track2/local/reverberate_lat_dir.sh b/egs/chime6/s5_track2/local/reverberate_lat_dir.sh
new file mode 120000
index 00000000000..57302268f6d
--- /dev/null
+++ b/egs/chime6/s5_track2/local/reverberate_lat_dir.sh
@@ -0,0 +1 @@
+../../s5_track1/local/reverberate_lat_dir.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/run_beamformit.sh b/egs/chime6/s5_track2/local/run_beamformit.sh
new file mode 120000
index 00000000000..832a16e3ba7
--- /dev/null
+++ b/egs/chime6/s5_track2/local/run_beamformit.sh
@@ -0,0 +1 @@
+../../s5_track1/local/run_beamformit.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/run_ivector_common.sh b/egs/chime6/s5_track2/local/run_ivector_common.sh
new file mode 120000
index 00000000000..df7fca84335
--- /dev/null
+++ b/egs/chime6/s5_track2/local/run_ivector_common.sh
@@ -0,0 +1 @@
+../../s5_track1/local/nnet3/run_ivector_common.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/run_wpe.py b/egs/chime6/s5_track2/local/run_wpe.py
new file mode 120000
index 00000000000..6621607c932
--- /dev/null
+++ b/egs/chime6/s5_track2/local/run_wpe.py
@@ -0,0 +1 @@
+../../s5_track1/local/run_wpe.py
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/run_wpe.sh b/egs/chime6/s5_track2/local/run_wpe.sh
new file mode 120000
index 00000000000..187080e62e4
--- /dev/null
+++ b/egs/chime6/s5_track2/local/run_wpe.sh
@@ -0,0 +1 @@
+../../s5_track1/local/run_wpe.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/score.sh b/egs/chime6/s5_track2/local/score.sh
new file mode 120000
index 00000000000..6a200b42ed3
--- /dev/null
+++ b/egs/chime6/s5_track2/local/score.sh
@@ -0,0 +1 @@
+../steps/scoring/score_kaldi_wer.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh b/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
new file mode 100755
index 00000000000..91d52b39269
--- /dev/null
+++ b/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+
+# Copyright 2016-17  Vimal Manohar
+#              2017  Nagendra Kumar Goel
+# Apache 2.0.
+
+# This script does nnet3-based speech activity detection given an input 
+# kaldi data directory and outputs a segmented kaldi data directory.
+# This script can also do music detection and other similar segmentation
+# using appropriate options such as --output-name output-music.
+
+set -e 
+set -o pipefail
+set -u
+
+if [ -f ./path.sh ]; then . ./path.sh; fi
+
+affix=  # Affix for the segmentation
+nj=32
+cmd=queue.pl
+stage=-1
+
+# Feature options (Must match training)
+mfcc_config=conf/mfcc_hires.conf
+feat_affix=   # Affix for the type of feature used
+
+output_name=output   # The output node in the network
+sad_name=sad    # Base name for the directory storing the computed loglikes
+                # Can be music for music detection
+segmentation_name=segmentation  # Base name for the directory doing segmentation
+                                # Can be segmentation_music for music detection
+
+# SAD network config
+iter=final  # Model iteration to use
+
+# Contexts must ideally match training for LSTM models, but
+# may not necessarily for stats components
+extra_left_context=0  # Set to some large value, typically 40 for LSTM (must match training)
+extra_right_context=0  
+extra_left_context_initial=-1
+extra_right_context_final=-1
+frames_per_chunk=150
+
+# Decoding options
+graph_opts="--min-silence-duration=0.03 --min-speech-duration=0.3 --max-speech-duration=10.0"
+acwt=0.3
+
+# These <from>_in_<to>_weight represent the fraction of <from> probability 
+# to transfer to <to> class.
+# e.g. --speech-in-sil-weight=0.0 --garbage-in-sil-weight=0.0 --sil-in-speech-weight=0.0 --garbage-in-speech-weight=0.3
+transform_probs_opts=""
+
+# Postprocessing options
+segment_padding=0.2   # Duration (in seconds) of padding added to segments 
+min_segment_dur=0   # Minimum duration (in seconds) required for a segment to be included
+                    # This is before any padding. Segments shorter than this duration will be removed.
+                    # This is an alternative to --min-speech-duration above.
+merge_consecutive_max_dur=0   # Merge consecutive segments as long as the merged segment is no longer than this many
+                              # seconds. The segments are only merged if their boundaries are touching.
+                              # This is after padding by --segment-padding seconds.
+                              # 0 means do not merge. Use 'inf' to not limit the duration.
+
+echo $* 
+
+. utils/parse_options.sh
+
+if [ $# -ne 5 ]; then
+  echo "This script does nnet3-based speech activity detection given an input kaldi "
+  echo "data directory and outputs an output kaldi data directory."
+  echo "See script for details of the options to be supplied."
+  echo "Usage: $0 <src-data-dir> <sad-nnet-dir> <mfcc-dir> <work-dir> <out-data-dir>"
+  echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev exp/nnet3_sad_snr/nnet_tdnn_j_n4 \\"
+  echo "    mfcc_hires exp/segmentation_sad_snr/nnet_tdnn_j_n4 data/ami_sdm1_dev"
+  echo ""
+  echo "Options: "
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <num-job>                                 # number of parallel jobs to run."
+  echo "  --stage <stage>                                # stage to do partial re-run from."
+  echo "  --convert-data-dir-to-whole <true|false>    # If true, the input data directory is "
+  echo "                                              # first converted to whole data directory (i.e. whole recordings) "
+  echo "                                              # and segmentation is done on that."
+  echo "                                              # If false, then the original segments are "
+  echo "                                              # retained and they are split into sub-segments."
+  echo "  --output-name <name>    # The output node in the network"
+  echo "  --extra-left-context  <context|0>   # Set to some large value, typically 40 for LSTM (must match training)"
+  echo "  --extra-right-context  <context|0>   # For BLSTM or statistics pooling"
+  exit 1
+fi
+
+src_data_dir=$1   # The input data directory that needs to be segmented.
+                  # If convert_data_dir_to_whole is true, any segments in that will be ignored.
+sad_nnet_dir=$2   # The SAD neural network
+mfcc_dir=$3       # The directory to store the features
+dir=$4            # Work directory
+data_dir=$5       # The output data directory will be ${data_dir}_seg
+
+affix=${affix:+_$affix}
+feat_affix=${feat_affix:+_$feat_affix}
+
+data_id=`basename $data_dir`
+sad_dir=${dir}/${sad_name}${affix}_${data_id}${feat_affix}
+seg_dir=${dir}/${segmentation_name}${affix}_${data_id}${feat_affix}
+test_data_dir=data/${data_id}${feat_affix}
+
+###############################################################################
+## Forward pass through the network network and dump the log-likelihoods.
+###############################################################################
+
+frame_subsampling_factor=1
+if [ -f $sad_nnet_dir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=$(cat $sad_nnet_dir/frame_subsampling_factor)
+fi
+
+mkdir -p $dir
+if [ $stage -le 1 ]; then
+  if [ "$(readlink -f $sad_nnet_dir)" != "$(readlink -f $dir)" ]; then
+    cp $sad_nnet_dir/cmvn_opts $dir || exit 1
+  fi
+
+  ########################################################################
+  ## Initialize neural network for decoding using the output $output_name
+  ########################################################################
+
+  if [ ! -z "$output_name" ] && [ "$output_name" != output ]; then
+    $cmd $dir/log/get_nnet_${output_name}.log \
+      nnet3-copy --edits="rename-node old-name=$output_name new-name=output" \
+      $sad_nnet_dir/$iter.raw $dir/${iter}_${output_name}.raw || exit 1
+    iter=${iter}_${output_name}
+  else 
+    if ! diff $sad_nnet_dir/$iter.raw $dir/$iter.raw; then
+      cp $sad_nnet_dir/$iter.raw $dir/
+    fi
+  fi
+
+  steps/nnet3/compute_output.sh --nj $nj --cmd "$cmd" \
+    --iter ${iter} \
+    --extra-left-context $extra_left_context \
+    --extra-right-context $extra_right_context \
+    --extra-left-context-initial $extra_left_context_initial \
+    --extra-right-context-final $extra_right_context_final \
+    --frames-per-chunk $frames_per_chunk --apply-exp true \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    ${test_data_dir} $dir $sad_dir || exit 1
+fi
+
+###############################################################################
+## Prepare FST we search to make speech/silence decisions.
+###############################################################################
+
+utils/data/get_utt2dur.sh --nj $nj --cmd "$cmd" $test_data_dir || exit 1
+frame_shift=$(utils/data/get_frame_shift.sh $test_data_dir) || exit 1
+
+graph_dir=${dir}/graph_${output_name}
+if [ $stage -le 2 ]; then
+  mkdir -p $graph_dir
+
+  # 1 for silence and 2 for speech
+  cat <<EOF > $graph_dir/words.txt
+<eps> 0
+silence 1
+speech 2
+EOF
+
+  $cmd $graph_dir/log/make_graph.log \
+    steps/segmentation/internal/prepare_sad_graph.py $graph_opts \
+      --frame-shift=$(perl -e "print $frame_shift * $frame_subsampling_factor") - \| \
+    fstcompile --isymbols=$graph_dir/words.txt --osymbols=$graph_dir/words.txt '>' \
+      $graph_dir/HCLG.fst
+fi
+
+###############################################################################
+## Do Viterbi decoding to create per-frame alignments.
+###############################################################################
+
+post_vec=$sad_nnet_dir/post_${output_name}.vec
+if [ ! -f $sad_nnet_dir/post_${output_name}.vec ]; then
+  if [ ! -f $sad_nnet_dir/post_${output_name}.txt ]; then
+    echo "$0: Could not find $sad_nnet_dir/post_${output_name}.vec. "
+    echo "Re-run the corresponding stage in the training script possibly "
+    echo "with --compute-average-posteriors=true or compute the priors "
+    echo "from the training labels"
+    exit 1
+  else
+    post_vec=$sad_nnet_dir/post_${output_name}.txt
+  fi
+fi
+
+mkdir -p $seg_dir
+if [ $stage -le 3 ]; then
+  steps/segmentation/internal/get_transform_probs_mat.py \
+    --priors="$post_vec" $transform_probs_opts > $seg_dir/transform_probs.mat
+
+  steps/segmentation/decode_sad.sh --acwt $acwt --cmd "$cmd" \
+    --nj $nj \
+    --transform "$seg_dir/transform_probs.mat" \
+    $graph_dir $sad_dir $seg_dir
+fi
+
+###############################################################################
+## Post-process segmentation to create kaldi data directory.
+###############################################################################
+
+if [ $stage -le 4 ]; then
+  steps/segmentation/post_process_sad_to_segments.sh \
+    --segment-padding $segment_padding --min-segment-dur $min_segment_dur \
+    --merge-consecutive-max-dur $merge_consecutive_max_dur \
+    --cmd "$cmd" --frame-shift $(perl -e "print $frame_subsampling_factor * $frame_shift") \
+    ${test_data_dir} ${seg_dir} ${seg_dir}
+fi
+
+if [ $stage -le 5 ]; then
+  utils/data/subsegment_data_dir.sh ${test_data_dir} ${seg_dir}/segments \
+    ${data_dir}_seg
+fi
+
+echo "$0: Created output segmented kaldi data directory in ${data_dir}_seg"
+exit 0
diff --git a/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh b/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
new file mode 100755
index 00000000000..5701424869a
--- /dev/null
+++ b/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+# Copyright 2017   Nagendra Kumar Goel
+#           2018   Vimal Manohar
+# Apache 2.0
+
+# This is a script to train a TDNN for speech activity detection (SAD) 
+# using LSTM for long-context information.
+
+stage=0
+train_stage=-10
+get_egs_stage=-10
+egs_opts=
+
+chunk_width=20
+
+extra_left_context=60
+extra_right_context=10
+relu_dim=256
+cell_dim=256
+projection_dim=64
+
+# training options
+num_epochs=1
+initial_effective_lrate=0.0003
+final_effective_lrate=0.00003
+num_jobs_initial=3
+num_jobs_final=8
+remove_egs=true
+max_param_change=0.2  # Small max-param change for small network
+dropout_schedule='0,0@0.20,0.1@0.50,0'
+
+egs_dir=
+nj=40
+
+dir=
+affix=1a
+
+data_dir=
+targets_dir=
+
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
+. ./utils/parse_options.sh
+
+set -o pipefail
+set -u
+
+if [ -z "$dir" ]; then
+  dir=exp/segmentation_1a/tdnn_lstm_asr_sad
+fi
+dir=$dir${affix:+_$affix}
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+mkdir -p $dir
+
+samples_per_iter=`perl -e "print int(400000 / $chunk_width)"`
+cmvn_opts="--norm-means=false --norm-vars=false"
+echo $cmvn_opts > $dir/cmvn_opts
+
+if [ $stage -le 5 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat 
+
+  relu-renorm-layer name=tdnn1 input=lda dim=$relu_dim add-log-stddev=true
+  relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true
+  relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true
+  fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim decay-time=20 delay=-3 dropout-proportion=0.0
+  relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) add-log-stddev=true dim=$relu_dim
+  fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim decay-time=20 delay=-3 dropout-proportion=0.0
+  relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim
+
+  output-layer name=output include-log-softmax=true dim=3 learning-rate-factor=0.1 input=tdnn5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+    --config-dir $dir/configs/
+
+  cat <<EOF >> $dir/configs/vars
+num_targets=3
+EOF
+fi
+
+if [ $stage -le 6 ]; then
+  num_utts=`cat $data_dir/utt2spk | wc -l`
+  # Set num_utts_subset for diagnostics to a reasonable value
+  # of max(min(0.005 * num_utts, 300), 12)
+  num_utts_subset=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 300 ? 300 : ($n < 12 ? 12 : $n))' $num_utts`
+
+  steps/nnet3/train_raw_rnn.py --stage=$train_stage \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \
+    --egs.chunk-left-context=$extra_left_context \
+    --egs.chunk-right-context=$extra_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.samples-per-iter=20000 \
+    --trainer.optimization.num-jobs-initial=$num_jobs_initial \
+    --trainer.optimization.num-jobs-final=$num_jobs_final \
+    --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate=$final_effective_lrate \
+    --trainer.optimization.shrink-value=0.99 \
+    --trainer.dropout-schedule="$dropout_schedule" \
+    --trainer.rnn.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.5 \
+    --trainer.deriv-truncate-margin=10 \
+    --trainer.max-param-change=$max_param_change \
+    --trainer.compute-per-dim-accuracy=true \
+    --cmd="$decode_cmd" --nj $nj \
+    --cleanup=true \
+    --cleanup.remove-egs=$remove_egs \
+    --cleanup.preserve-model-interval=10 \
+    --use-gpu=true \
+    --use-dense-targets=true \
+    --feat-dir=$data_dir \
+    --targets-scp="$targets_dir/targets.scp" \
+    --egs.opts="--frame-subsampling-factor 3 --num-utts-subset $num_utts_subset" \
+    --dir=$dir || exit 1
+fi
+
+if [ $stage -le 7 ]; then
+  # Use a subset to compute prior over the output targets
+  $train_cmd $dir/log/get_priors.log \
+    matrix-sum-rows "scp:utils/subset_scp.pl --quiet 1000 $targets_dir/targets.scp |" \
+    ark:- \| vector-sum --binary=false ark:- $dir/post_output.vec || exit 1
+
+  echo 3 > $dir/frame_subsampling_factor
+fi
diff --git a/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh b/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
new file mode 100755
index 00000000000..bb985462f49
--- /dev/null
+++ b/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+# Copyright 2017   Nagendra Kumar Goel
+#           2018   Vimal Manohar
+# Apache 2.0
+
+# This is a script to train a TDNN for speech activity detection (SAD) 
+# using statistics pooling for long-context information.
+
+stage=0
+train_stage=-10
+get_egs_stage=-10
+egs_opts=
+
+chunk_width=20
+
+# The context is chosen to be around 1 second long. The context at test time
+# is expected to be around the same.
+extra_left_context=79
+extra_right_context=21
+
+relu_dim=256
+
+# training options
+num_epochs=1
+initial_effective_lrate=0.0003
+final_effective_lrate=0.00003
+num_jobs_initial=3
+num_jobs_final=8
+remove_egs=true
+max_param_change=0.2  # Small max-param change for small network
+
+egs_dir=
+nj=40
+
+dir=
+affix=1a
+
+data_dir=
+targets_dir=
+
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
+. ./utils/parse_options.sh
+
+set -o pipefail
+set -u
+
+if [ -z "$dir" ]; then
+  dir=exp/segmentation_1a/tdnn_stats_sad
+fi
+dir=$dir${affix:+_$affix}
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+mkdir -p $dir
+
+samples_per_iter=`perl -e "print int(400000 / $chunk_width)"`
+cmvn_opts="--norm-means=false --norm-vars=false"
+echo $cmvn_opts > $dir/cmvn_opts
+
+if [ $stage -le 5 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat 
+
+  relu-renorm-layer name=tdnn1 input=lda dim=$relu_dim add-log-stddev=true
+  relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true
+  relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true
+  stats-layer name=tdnn3_stats config=mean+count(-99:3:9:99)
+  relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim
+  stats-layer name=tdnn4_stats config=mean+count(-108:6:18:108)
+  relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim
+
+  output-layer name=output include-log-softmax=true dim=3 learning-rate-factor=0.1 input=tdnn5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+    --config-dir $dir/configs/
+
+  cat <<EOF >> $dir/configs/vars
+num_targets=3
+EOF
+fi
+
+if [ $stage -le 6 ]; then
+  num_utts=`cat $data_dir/utt2spk | wc -l`
+  # Set num_utts_subset for diagnostics to a reasonable value
+  # of max(min(0.005 * num_utts, 300), 12)
+  num_utts_subset=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 300 ? 300 : ($n < 12 ? 12 : $n))' $num_utts`
+
+  steps/nnet3/train_raw_rnn.py --stage=$train_stage \
+    --feat.cmvn-opts=$cmvn_opts \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \
+    --egs.chunk-left-context=$extra_left_context \
+    --egs.chunk-right-context=$extra_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.samples-per-iter=20000 \
+    --trainer.optimization.num-jobs-initial=$num_jobs_initial \
+    --trainer.optimization.num-jobs-final=$num_jobs_final \
+    --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate=$final_effective_lrate \
+    --trainer.rnn.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.5 \
+    --trainer.deriv-truncate-margin=10 \
+    --trainer.max-param-change=$max_param_change \
+    --trainer.compute-per-dim-accuracy=true \
+    --cmd="$decode_cmd" --nj $nj \
+    --cleanup=true \
+    --cleanup.remove-egs=$remove_egs \
+    --cleanup.preserve-model-interval=10 \
+    --use-gpu=true \
+    --use-dense-targets=true \
+    --feat-dir=$data_dir \
+    --targets-scp="$targets_dir/targets.scp" \
+    --egs.opts="--frame-subsampling-factor 3 --num-utts-subset $num_utts_subset" \
+    --dir=$dir || exit 1
+fi
+
+if [ $stage -le 7 ]; then
+  # Use a subset to compute prior over the output targets
+  #$train_cmd $dir/log/get_priors.log \
+  #  matrix-sum-rows "scp:utils/subset_scp.pl --quiet 1000 $targets_dir/targets.scp |" \
+  #  ark:- \| vector-sum --binary=false ark:- $dir/post_output.vec || exit 1
+
+  # Since the train data is individual microphones, while the dev and
+  # eval are beamformed, it is likely that the train contains a much
+  # higher ratio of silences. So using priors computed from the train
+  # data may miss a lot of speech in the dev/eval sets. Hence we manually
+  # tune the prior on the dev set.
+  # With the following prior, the SAD system results are:
+  # Dev (using -c 0.25)
+  # MISSED SPEECH =   1188.59 secs (  3.3 percent of scored time)
+  # FALARM SPEECH =    539.37 secs (  1.5 percent of scored time)
+  echo "[ 30 2 1 ]" > $dir/post_output.vec || exit 1
+
+  echo 3 > $dir/frame_subsampling_factor
+fi
+
diff --git a/egs/chime6/s5_track2/local/train_diarizer.sh b/egs/chime6/s5_track2/local/train_diarizer.sh
new file mode 100755
index 00000000000..71918e7cabc
--- /dev/null
+++ b/egs/chime6/s5_track2/local/train_diarizer.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# Copyright
+#        2019   David Snyder
+# Apache 2.0.
+#
+# This script is based on the run.sh script in the Voxceleb v2 recipe.
+# It trains an x-vector DNN for diarization.
+
+mfccdir=`pwd`/mfcc
+vaddir=`pwd`/mfcc
+
+voxceleb1_root=/export/corpora/VoxCeleb1
+voxceleb2_root=/export/corpora/VoxCeleb2
+data_dir=train_worn_simu_u400k
+model_dir=exp/xvector_nnet_1a
+
+stage=0
+train_stage=-1
+
+. ./cmd.sh
+
+if [ -f ./path.sh ]; then . ./path.sh; fi
+set -e -u -o pipefail
+. utils/parse_options.sh
+
+if [ $# -ne 0 ]; then
+  exit 1
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: preparing voxceleb 2 data"
+  local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
+  local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
+
+  echo "$0: preparing voxceleb 1 data (see comments if this step fails)"
+  # The format of the voxceleb 1 corpus has changed several times since it was
+  # released.  Therefore, our dataprep scripts may or may not fail depending
+  # on the version of the corpus you obtained.
+  # If you downloaded the corpus soon after it was first released, this
+  # version of the dataprep script might work:
+  local/make_voxceleb1.pl $voxceleb1_root data/voxceleb1
+  # However, if you've downloaded the corpus recently, you may need to use the
+  # the following scripts instead:
+  #local/make_voxceleb1_v2.pl $voxceleb1_root dev data/voxceleb1_train
+  #local/make_voxceleb1_v2.pl $voxceleb1_root test data/voxceleb1_test
+
+  # We should now have about 7,351 speakers and 1,277,503 utterances.
+  utils/combine_data.sh data/voxceleb data/voxceleb2_train data/voxceleb2_test
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: preparing features for training data (voxceleb 1 + 2)"
+  steps/make_mfcc.sh --write-utt2num-frames true \
+    --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/voxceleb exp/make_mfcc $mfccdir
+  utils/fix_data_dir.sh data/voxceleb
+  # Note that we apply CMN to the MFCCs and write these to the disk.  These
+  # features will later be used to train the x-vector DNN.
+fi
+
+# In this section, we augment the voxceleb data with reverberation.
+# Note that we can probably improve the x-vector DNN if we include
+# augmentations from the nonspeech regions of the Chime 6 training
+# dataset.
+if [ $stage -le 2 ]; then
+  echo "$0: applying augmentation to x-vector training data (just reverb for now)"
+  frame_shift=0.01
+  awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' data/voxceleb/utt2num_frames > data/voxceleb/reco2dur
+
+  if [ ! -d "RIRS_NOISES" ]; then
+    echo "$0: downloading simulated room impulse response dataset"
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the training data.  Note that we don't add any
+  # additive noise here.
+  steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/voxceleb data/voxceleb_reverb
+  utils/copy_data_dir.sh --utt-suffix "-reverb" data/voxceleb_reverb data/voxceleb_reverb.new
+  rm -rf data/voxceleb_reverb
+  mv data/voxceleb_reverb.new data/voxceleb_reverb
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: making MFCCs for augmented training data"
+  # Make MFCCs for the augmented data.  Note that we do not compute a new
+  # vad.scp file here.  Instead, we use the vad.scp from the clean version of
+  # the list.
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/voxceleb_reverb exp/make_mfcc $mfccdir
+  # Combine the clean and augmented training data.  This is now roughly
+  # double the size of the original clean list.
+  utils/combine_data.sh data/voxceleb_combined data/voxceleb_reverb data/voxceleb
+fi
+
+# Now we prepare the features to generate examples for xvector training.
+if [ $stage -le 4 ]; then
+  # This script applies CMVN and removes nonspeech frames.  Note that this is somewhat
+  # wasteful, as it roughly doubles the amount of training data on disk.  After
+  # creating voxceleb examples, this can be removed.
+  echo "$0: preparing features to train x-vector DNN"
+  local/nnet3/xvector/prepare_feats.sh --nj 40 --cmd "$train_cmd" \
+    data/voxceleb_combined data/voxceleb_combined_cmn exp/voxceleb_combined_cmn
+  utils/fix_data_dir.sh data/voxceleb_combined_cmn
+fi
+
+if [ $stage -le 5 ]; then
+  # Now, we need to remove features that are too short after removing silence
+  # frames.  We want at least 4s (400 frames) per utterance.
+  min_len=400
+  mv data/voxceleb_combined_cmn/utt2num_frames data/voxceleb_combined_cmn/utt2num_frames.bak
+  awk -v min_len=${min_len} '$2 > min_len {print $1, $2}' data/voxceleb_combined_cmn/utt2num_frames.bak > data/voxceleb_combined_cmn/utt2num_frames
+  utils/filter_scp.pl data/voxceleb_combined_cmn/utt2num_frames data/voxceleb_combined_cmn/utt2spk > data/voxceleb_combined_cmn/utt2spk.new
+  mv data/voxceleb_combined_cmn/utt2spk.new data/voxceleb_combined_cmn/utt2spk
+  utils/fix_data_dir.sh data/voxceleb_combined_cmn
+
+  # We also want several utterances per speaker. Now we'll throw out speakers
+  # with fewer than 8 utterances.
+  min_num_utts=8
+  awk '{print $1, NF-1}' data/voxceleb_combined_cmn/spk2utt > data/voxceleb_combined_cmn/spk2num
+  awk -v min_num_utts=${min_num_utts} '$2 >= min_num_utts {print $1, $2}' data/voxceleb_combined_cmn/spk2num | utils/filter_scp.pl - data/voxceleb_combined_cmn/spk2utt > data/voxceleb_combined_cmn/spk2utt.new
+  mv data/voxceleb_combined_cmn/spk2utt.new data/voxceleb_combined_cmn/spk2utt
+  utils/spk2utt_to_utt2spk.pl data/voxceleb_combined_cmn/spk2utt > data/voxceleb_combined_cmn/utt2spk
+
+  utils/filter_scp.pl data/voxceleb_combined_cmn/utt2spk data/voxceleb_combined_cmn/utt2num_frames > data/voxceleb_combined_cmn/utt2num_frames.new
+  mv data/voxceleb_combined_cmn/utt2num_frames.new data/voxceleb_combined_cmn/utt2num_frames
+
+  utils/fix_data_dir.sh data/voxceleb_combined_cmn
+fi
+
+# Stages 6 through 8 are handled in run_xvector.sh.
+# This script trains the x-vector DNN on the augmented voxceleb data.
+local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage $train_stage \
+  --data data/voxceleb_combined_cmn --nnet-dir $model_dir \
+  --egs-dir $model_dir/egs
+
+if [ $stage -le 9 ]; then
+  echo "$0: preparing a subset of Chime 6 training data to train PLDA model"
+  utils/subset_data_dir.sh ${data_dir} 100000 data/plda_train
+  steps/make_mfcc.sh --write-utt2num-frames true \
+    --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/plda_train exp/make_mfcc $mfccdir
+  utils/fix_data_dir.sh data/plda_train
+  local/nnet3/xvector/prepare_feats.sh --nj 40 --cmd "$train_cmd" \
+    data/plda_train data/plda_train_cmn exp/plda_train_cmn
+  if [ -f data/plda_train/segments ]; then
+    cp data/plda_train/segments data/plda_train_cmn/
+  fi
+fi
+
+if [ $stage -le 10 ]; then
+  echo "$0: extracting x-vector for PLDA training data"
+  utils/fix_data_dir.sh data/plda_train_cmn
+  diarization/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd --mem 10G" \
+    --nj 40 --window 3.0 --period 10.0 --min-segment 1.5 --apply-cmn false \
+    --hard-min true $model_dir \
+    data/plda_train_cmn $model_dir/xvectors_plda_train
+fi
+
+# Train PLDA models
+if [ $stage -le 11 ]; then
+  echo "$0: training PLDA model"
+  $train_cmd $model_dir/xvectors_plda_train/log/plda.log \
+    ivector-compute-plda ark:$model_dir/xvectors_plda_train/spk2utt \
+      "ark:ivector-subtract-global-mean \
+      scp:$model_dir/xvectors_plda_train/xvector.scp ark:- \
+      | transform-vec $model_dir/xvectors_plda_train/transform.mat ark:- ark:- \
+      | ivector-normalize-length ark:- ark:- |" \
+      $model_dir/xvectors_plda_train/plda || exit 1;
+  cp $model_dir/xvectors_plda_train/plda $model_dir/
+  cp $model_dir/xvectors_plda_train/transform.mat $model_dir/
+  cp $model_dir/xvectors_plda_train/mean.vec $model_dir/
+fi
diff --git a/egs/chime6/s5_track2/local/train_lms_srilm.sh b/egs/chime6/s5_track2/local/train_lms_srilm.sh
new file mode 120000
index 00000000000..a7666f6cded
--- /dev/null
+++ b/egs/chime6/s5_track2/local/train_lms_srilm.sh
@@ -0,0 +1 @@
+../../s5_track1/local/train_lms_srilm.sh
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/local/train_sad.sh b/egs/chime6/s5_track2/local/train_sad.sh
new file mode 100755
index 00000000000..e12a0cad694
--- /dev/null
+++ b/egs/chime6/s5_track2/local/train_sad.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+
+# Copyright  2017  Nagendra Kumar Goel
+#            2017  Vimal Manohar
+#            2019  Desh Raj
+# Apache 2.0
+
+# This script is based on local/run_asr_segmentation.sh script in the
+# Aspire recipe. It demonstrates nnet3-based speech activity detection for
+# segmentation.
+# This script:
+# 1) Prepares targets (per-frame labels) for a subset of training data 
+#    using GMM models
+# 2) Trains TDNN+Stats or TDNN+LSTM neural network using the targets 
+# 3) Demonstrates using the SAD system to get segments of dev data
+
+lang=data/lang   # Must match the one used to train the models
+lang_test=data/lang_test  # Lang directory for decoding.
+
+data_dir=
+test_sets=
+# Model directory used to align the $data_dir to get target labels for training
+# SAD. This should typically be a speaker-adapted system.
+sat_model_dir=
+# Model direcotry used to decode the whole-recording version of the $data_dir to
+# get target labels for training SAD. This should typically be a
+# speaker-independent system like LDA+MLLT system.
+model_dir=
+graph_dir=                  # Graph for decoding whole-recording version of $data_dir.
+                            # If not provided, a new one will be created using $lang_test
+
+# List of weights on labels obtained from alignment;
+# labels obtained from decoding; and default labels in out-of-segment regions
+merge_weights=1.0,0.1,0.5
+
+prepare_targets_stage=-10
+nstage=-10
+train_stage=-10
+stage=0
+nj=50
+reco_nj=40
+
+# test options
+test_nj=10
+
+. ./cmd.sh
+. ./conf/sad.conf
+
+if [ -f ./path.sh ]; then . ./path.sh; fi
+
+set -e -u -o pipefail
+. utils/parse_options.sh 
+
+if [ $# -ne 0 ]; then
+  exit 1
+fi
+
+dir=exp/segmentation${affix}
+sad_work_dir=exp/sad${affix}_${nnet_type}/
+sad_nnet_dir=$dir/tdnn_${nnet_type}_sad_1a
+
+mkdir -p $dir
+mkdir -p ${sad_work_dir}
+
+# See $lang/phones.txt and decide which should be garbage
+garbage_phones="laughs inaudible"
+silence_phones="sil spn noise"
+
+for p in $garbage_phones; do 
+  for a in "" "_B" "_E" "_I" "_S"; do
+    echo "$p$a"
+  done
+done > $dir/garbage_phones.txt
+
+for p in $silence_phones; do 
+  for a in "" "_B" "_E" "_I" "_S"; do
+    echo "$p$a"
+  done
+done > $dir/silence_phones.txt
+
+if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
+  steps/segmentation/internal/verify_phones_list.py $lang/phones.txt; then
+  echo "$0: Invalid $dir/{silence,garbage}_phones.txt"
+  exit 1
+fi
+
+# The training data may already be segmented, so we first prepare
+# a "whole" training data (not segmented) for training the SAD
+# system.
+
+whole_data_dir=${data_dir}_whole
+whole_data_id=$(basename $whole_data_dir)
+
+if [ $stage -le 0 ]; then
+  utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
+fi
+
+###############################################################################
+# Extract features for the whole data directory. We extract 13-dim MFCCs to
+# generate targets using the GMM system, and 40-dim MFCCs to train the NN-based
+# SAD.
+###############################################################################
+if [ $stage -le 1 ]; then
+  steps/make_mfcc.sh --nj $reco_nj --cmd "$train_cmd"  --write-utt2num-frames true \
+    --mfcc-config conf/mfcc.conf \
+    $whole_data_dir exp/make_mfcc/${whole_data_id}
+  steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/${whole_data_id}
+  utils/fix_data_dir.sh $whole_data_dir
+
+  utils/copy_data_dir.sh $whole_data_dir ${whole_data_dir}_hires
+  steps/make_mfcc.sh --nj $reco_nj --cmd "$train_cmd"  --write-utt2num-frames true \
+    --mfcc-config conf/mfcc_hires.conf \
+    ${whole_data_dir}_hires exp/make_mfcc/${whole_data_id}_hires
+  steps/compute_cmvn_stats.sh ${whole_data_dir}_hires exp/make_mfcc/${whole_data_id}_hires
+  utils/fix_data_dir.sh ${whole_data_dir}_hires
+fi
+
+###############################################################################
+# Prepare SAD targets for recordings
+###############################################################################
+targets_dir=$dir/${whole_data_id}_combined_targets_sub3
+if [ $stage -le 2 ]; then
+  steps/segmentation/prepare_targets_gmm.sh --stage $prepare_targets_stage \
+    --train-cmd "$train_cmd" --decode-cmd "$decode_cmd" \
+    --nj $nj --reco-nj $reco_nj --lang-test $lang \
+    --garbage-phones-list $dir/garbage_phones.txt \
+    --silence-phones-list $dir/silence_phones.txt \
+    --merge-weights "$merge_weights" \
+    --remove-mismatch-frames false \
+    --graph-dir "$graph_dir" \
+    $lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
+fi
+
+###############################################################################
+# Train a neural network for SAD
+###############################################################################
+if [ $stage -le 3 ]; then
+	if [ $nnet_type == "stats" ]; then
+		# Train a STATS-pooling network for SAD
+		local/segmentation/tuning/train_stats_sad_1a.sh \
+		  --stage $nstage --train-stage $train_stage \
+		  --targets-dir ${targets_dir} \
+		  --data-dir ${whole_data_dir}_hires --affix "1a" || exit 1
+	
+	elif [ $nnet_type == "lstm" ]; then
+    # Train a TDNN+LSTM network for SAD
+    local/segmentation/tuning/train_lstm_sad_1a.sh \
+      --stage $nstage --train-stage $train_stage \
+      --targets-dir ${targets_dir} \
+      --data-dir ${whole_data_dir}_hires --affix "1a" || exit 1
+
+  fi
+fi
+
+exit 0;
diff --git a/egs/chime6/s5_track2/local/wer_output_filter b/egs/chime6/s5_track2/local/wer_output_filter
new file mode 120000
index 00000000000..12a6c616d3d
--- /dev/null
+++ b/egs/chime6/s5_track2/local/wer_output_filter
@@ -0,0 +1 @@
+../../s5_track1/local/wer_output_filter
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/path.sh b/egs/chime6/s5_track2/path.sh
new file mode 100644
index 00000000000..c2526194bee
--- /dev/null
+++ b/egs/chime6/s5_track2/path.sh
@@ -0,0 +1,7 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+
diff --git a/egs/chime6/s5_track2/run.sh b/egs/chime6/s5_track2/run.sh
new file mode 100755
index 00000000000..1350b8e14d5
--- /dev/null
+++ b/egs/chime6/s5_track2/run.sh
@@ -0,0 +1,296 @@
+#!/bin/bash
+#
+# Chime-6 Track 2 baseline. Based mostly on the Chime-5 recipe, with the exception
+# that we are required to perform speech activity detection and speaker
+# diarization before ASR, since we do not have access to the oracle SAD and 
+# diarization labels.
+#
+# Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
+#            2019  Desh Raj, David Snyder, Ashish Arora
+# Apache 2.0
+
+# Begin configuration section.
+nj=50
+decode_nj=20
+stage=0
+nnet_stage=-10
+sad_stage=0
+diarizer_stage=0
+decode_stage=1
+enhancement=beamformit # for a new enhancement method,
+                       # change this variable and decode stage
+decode_only=false
+num_data_reps=4
+snrs="20:10:15:5:0"
+foreground_snrs="20:10:15:5:0"
+background_snrs="20:10:15:5:0"
+# End configuration section
+. ./utils/parse_options.sh
+
+. ./cmd.sh
+. ./path.sh
+
+if [ $decode_only == "true" ]; then
+  stage=18
+fi
+
+set -e # exit on error
+
+# chime5 main directory path
+# please change the path accordingly
+chime5_corpus=/export/corpora4/CHiME5
+# chime6 data directories, which are generated from ${chime5_corpus},
+# to synchronize audio files across arrays and modify the annotation (JSON) file accordingly
+chime6_corpus=${PWD}/CHiME6
+json_dir=${chime6_corpus}/transcriptions
+audio_dir=${chime6_corpus}/audio
+
+# training and test data
+train_set=train_worn_simu_u400k
+sad_train_set=train_worn_u400k
+test_sets="dev_${enhancement}_dereverb eval_${enhancement}_dereverb"
+
+# This script also needs the phonetisaurus g2p, srilm, beamformit
+./local/check_tools.sh || exit 1;
+
+###########################################################################
+# We first generate the synchronized audio files across arrays and
+# corresponding JSON files. Note that this requires sox v14.4.2,
+# which is installed via miniconda in ./local/check_tools.sh
+###########################################################################
+
+if [ $stage -le 0 ]; then
+  local/generate_chime6_data.sh \
+    --cmd "$train_cmd" \
+    ${chime5_corpus} \
+    ${chime6_corpus}
+fi
+
+###########################################################################
+# We prepare dict and lang in stages 1 to 3.
+###########################################################################
+
+if [ $stage -le 1 ]; then
+  # skip u03 and u04 as they are missing
+  for mictype in worn u01 u02 u05 u06; do
+    local/prepare_data.sh --mictype ${mictype} --train true \
+        ${audio_dir}/train ${json_dir}/train data/train_${mictype}
+  done
+  for dataset in dev; do
+    for mictype in worn; do
+      local/prepare_data.sh --mictype ${mictype} --train true \
+          ${audio_dir}/${dataset} ${json_dir}/${dataset} \
+          data/${dataset}_${mictype}
+    done
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  local/prepare_dict.sh
+
+  utils/prepare_lang.sh \
+    data/local/dict "<unk>" data/local/lang data/lang
+
+  local/train_lms_srilm.sh \
+    --train-text data/train_worn/text --dev-text data/dev_worn/text \
+    --oov-symbol "<unk>" --words-file data/lang/words.txt \
+    data/ data/srilm
+fi
+
+LM=data/srilm/best_3gram.gz
+if [ $stage -le 3 ]; then
+  # Compiles G for chime5 trigram LM
+  utils/format_lm.sh \
+    data/lang $LM data/local/dict/lexicon.txt data/lang
+
+fi
+
+if [ $stage -le 4 ]; then
+  # remove possibly bad sessions (P11_S03, P52_S19, P53_S24, P54_S24)
+  # see http://spandh.dcs.shef.ac.uk/chime_challenge/data.html for more details
+  utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
+  grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
+  utils/fix_data_dir.sh data/train_worn
+fi
+
+
+#########################################################################################
+# In stages 5 and 6, we augment and fix train data for our training purpose. point source
+# noises are extracted from chime corpus. Here we use 400k utterances from array microphones,
+# its augmentation and all the worn set utterances in train.
+#########################################################################################
+
+if [ $stage -le 5 ]; then
+  echo "$0: Extracting noise list from training data"
+  local/extract_noises.py $chime6_corpus/audio/train $chime6_corpus/transcriptions/train \
+    local/distant_audio_list distant_noises
+  local/make_noise_list.py distant_noises > distant_noise_list
+
+  noise_list=distant_noise_list
+  
+  echo "$0: Preparing simulated RIRs for data augmentation"
+  if [ ! -d RIRS_NOISES/ ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+
+  # This is the config for the system using simulated RIRs and point-source noises
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+  rvb_opts+=(--noise-set-parameters $noise_list)
+
+  steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --prefix "rev" \
+    --foreground-snrs $foreground_snrs \
+    --background-snrs $background_snrs \
+    --speech-rvb-probability 1 \
+    --pointsource-noise-addition-probability 1 \
+    --isotropic-noise-addition-probability 1 \
+    --num-replications $num_data_reps \
+    --max-noises-per-minute 1 \
+    --source-sampling-rate 16000 \
+    data/train_worn data/train_worn_rvb
+fi
+
+if [ $stage -le 6 ]; then
+  # combine mix array and worn mics
+  # randomly extract first 400k utterances from all mics
+  # if you want to include more training data, you can increase the number of array mic utterances
+  utils/combine_data.sh data/train_uall data/train_u01 data/train_u02 data/train_u05 data/train_u06
+  utils/subset_data_dir.sh data/train_uall 400000 data/train_u400k
+  utils/combine_data.sh data/${train_set} data/train_worn data/train_worn_rvb data/train_u400k
+  utils/combine_data.sh data/${sad_train_set} data/train_worn data/train_u400k
+fi
+
+if [ $stage -le 7 ]; then
+  # Split speakers up into 3-minute chunks.  This doesn't hurt adaptation, and
+  # lets us use more jobs for decoding etc.
+  utils/copy_data_dir.sh data/${train_set} data/${train_set}_nosplit
+  utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${train_set}_nosplit data/${train_set}
+fi
+
+##################################################################################
+# Now make MFCC features. We use 13-dim MFCCs to train the GMM-HMM models.
+##################################################################################
+
+if [ $stage -le 8 ]; then
+  # Now make MFCC features.
+  # mfccdir should be some place with a largish disk where you
+  # want to store MFCC features.
+  echo "$0:  make features..."
+  mfccdir=mfcc
+  steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \
+             --mfcc-config conf/mfcc.conf \
+             data/${train_set} exp/make_mfcc/${train_set} $mfccdir
+  steps/compute_cmvn_stats.sh data/${train_set} exp/make_mfcc/${train_set} $mfccdir
+  utils/fix_data_dir.sh data/${train_set}
+fi
+
+###################################################################################
+# Stages 9 to 14 train monophone and triphone models. They will be used for 
+# generating lattices for training the chain model and for obtaining targets
+# for training the SAD system.
+###################################################################################
+
+if [ $stage -le 9 ]; then
+  # make a subset for monophone training
+  utils/subset_data_dir.sh --shortest data/${train_set} 100000 data/${train_set}_100kshort
+  utils/subset_data_dir.sh data/${train_set}_100kshort 30000 data/${train_set}_30kshort
+fi
+
+if [ $stage -le 10 ]; then
+  # Starting basic training on MFCC features
+  steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
+          data/${train_set}_30kshort data/lang exp/mono
+fi
+
+if [ $stage -le 11 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+        data/${train_set} data/lang exp/mono exp/mono_ali
+
+  steps/train_deltas.sh --cmd "$train_cmd" \
+      2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1
+fi
+
+if [ $stage -le 12 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+        data/${train_set} data/lang exp/tri1 exp/tri1_ali
+
+  steps/train_lda_mllt.sh --cmd "$train_cmd" \
+        4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2
+fi
+
+if [ $stage -le 13 ]; then
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+        data/${train_set} data/lang exp/tri2 exp/tri2_ali
+
+  steps/train_sat.sh --cmd "$train_cmd" \
+         5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3
+fi
+
+if [ $stage -le 14 ]; then
+  # The following script cleans the data and produces cleaned data
+  steps/cleanup/clean_and_segment_data.sh --nj $nj --cmd "$train_cmd" \
+    --segmentation-opts "--min-segment-length 0.3 --min-new-segment-length 0.6" \
+    data/${train_set} data/lang exp/tri3 exp/tri3_cleaned data/${train_set}_cleaned
+fi
+
+##########################################################################
+# CHAIN MODEL TRAINING
+# You can also download a pretrained chain ASR model using:
+# wget http://kaldi-asr.org/models/12/0012_asr_v1.tar.gz
+# Once it is downloaded, extract using: tar -xvzf 0012_asr_v1.tar.gz
+# and copy the contents of the exp/ directory to your exp/
+##########################################################################
+if [ $stage -le 15 ]; then
+  # chain TDNN
+  local/chain/run_tdnn.sh --nj $nj \
+    --stage $nnet_stage \
+    --train-set ${train_set}_cleaned \
+    --test-sets "$test_sets" \
+    --gmm tri3_cleaned --nnet3-affix _${train_set}_cleaned_rvb
+fi
+
+##########################################################################
+# SAD MODEL TRAINING
+# You can also download a pretrained SAD model using:
+# wget http://kaldi-asr.org/models/12/0012_sad_v1.tar.gz
+# Once it is downloaded, extract using: tar -xvzf 0012_sad_v1.tar.gz
+# and copy the contents of the exp/ directory to your exp/
+##########################################################################
+if [ $stage -le 16 ]; then
+  local/train_sad.sh --stage $sad_stage --nj $nj \
+    --data-dir data/${sad_train_set} --test-sets "${test_sets}" \
+    --sat-model-dir exp/tri3_cleaned \
+    --model-dir exp/tri2
+fi
+
+##########################################################################
+# DIARIZATION MODEL TRAINING
+# You can also download a pretrained diarization model using:
+# wget http://kaldi-asr.org/models/12/0012_diarization_v1.tar.gz
+# Once it is downloaded, extract using: tar -xvzf 0012_diarization_v1.tar.gz
+# and copy the contents of the exp/ directory to your exp/
+##########################################################################
+if [ $stage -le 17 ]; then
+  local/train_diarizer.sh --stage $diarizer_stage \
+    --data-dir data/${train_set} \
+    --model-dir exp/xvector_nnet_1a
+fi
+
+##########################################################################
+# DECODING: In track 2, we are given raw utterances without segment
+# or speaker information, so we have to decode the whole pipeline, i.e.,
+# SAD -> Diarization -> ASR. This is done in the local/decode.sh
+# script.
+##########################################################################
+if [ $stage -le 18 ]; then
+  local/decode.sh --stage $decode_stage \
+    --enhancement $enhancement \
+    --test-sets "$test_sets"
+fi
+
+exit 0;
+
diff --git a/egs/chime6/s5_track2/sid b/egs/chime6/s5_track2/sid
new file mode 120000
index 00000000000..893a12f30c9
--- /dev/null
+++ b/egs/chime6/s5_track2/sid
@@ -0,0 +1 @@
+../../sre08/v1/sid
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/steps b/egs/chime6/s5_track2/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/chime6/s5_track2/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/chime6/s5_track2/utils b/egs/chime6/s5_track2/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/chime6/s5_track2/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
index 20bcfd96d96..76025f4a388 100755
--- a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
+++ b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
@@ -46,6 +46,7 @@ overlap_duration=2.5
 max_remaining_duration=5  # If the last remaining piece when splitting uniformly
                           # is smaller than this duration, then the last piece 
                           # is  merged with the previous.
+remove_mismatch_frames=true
 
 # List of weights on labels obtained from alignment, 
 # labels obtained from decoding and default labels in out-of-segment regions
@@ -108,7 +109,7 @@ for f in $in_whole_data_dir/feats.scp $in_data_dir/segments \
   fi
 done
 
-utils/validate_data_dir.sh $in_data_dir || exit 1
+utils/validate_data_dir.sh --no-feats $in_data_dir || exit 1
 utils/validate_data_dir.sh --no-text $in_whole_data_dir || exit 1
 
 if ! cat $garbage_phones_list $silence_phones_list | \
@@ -159,7 +160,7 @@ whole_data_dir=$dir/$whole_data_id
 # Obtain supervision-constrained lattices
 ###############################################################################
 sup_lats_dir=$dir/`basename ${ali_model_dir}`_sup_lats_${data_id}
-if [ $stage -le 2 ]; then
+if [ $stage -le 3 ]; then
   steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" \
     ${data_dir} ${lang} ${ali_model_dir} $sup_lats_dir || exit 1
 fi
@@ -170,7 +171,7 @@ fi
 uniform_seg_data_dir=$dir/${whole_data_id}_uniformseg_${max_segment_duration}sec
 uniform_seg_data_id=`basename $uniform_seg_data_dir`
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 4 ]; then
   utils/data/get_segments_for_data.sh ${whole_data_dir} > \
     ${whole_data_dir}/segments
 
@@ -193,7 +194,7 @@ model_id=$(basename $model_dir)
 ###############################################################################
 if [ -z "$graph_dir" ]; then
   graph_dir=$dir/$model_id/graph
-  if [ $stage -le 4 ]; then
+  if [ $stage -le 5 ]; then
     if [ ! -f $graph_dir/HCLG.fst ]; then
       rm -r $dir/lang_test 2>/dev/null || true
       cp -r $lang_test/ $dir/lang_test
@@ -207,7 +208,7 @@ fi
 ###############################################################################
 model_id=$(basename $model_dir)
 decode_dir=$dir/${model_id}/decode_${uniform_seg_data_id}
-if [ $stage -le 5 ]; then 
+if [ $stage -le 6 ]; then 
   mkdir -p $decode_dir
   
   cp $model_dir/{final.mdl,final.mat,*_opts,tree} $dir/${model_id}
@@ -228,7 +229,7 @@ ali_model_id=`basename $ali_model_dir`
 # The target values are obtained by summing up posterior probabilites of 
 # arcs from lattice-arc-post over silence, speech and garbage phones.
 ###############################################################################
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   steps/segmentation/lats_to_targets.sh --cmd "$train_cmd" \
     --silence-phones "$silence_phones_list" \
     --garbage-phones "$garbage_phones_list" \
@@ -237,7 +238,7 @@ if [ $stage -le 6 ]; then
     $dir/${ali_model_id}_${data_id}_sup_targets
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 8 ]; then
   steps/segmentation/lats_to_targets.sh --cmd "$train_cmd" \
     --silence-phones "$silence_phones_list" \
     --garbage-phones "$garbage_phones_list" \
@@ -253,7 +254,7 @@ fi
 # for the manual segments, these are converted to whole recording-levels 
 # by inserting [ 0 0 0 ] for the out-of-manual segment regions.
 ###############################################################################
-if [ $stage -le 8 ]; then
+if [ $stage -le 9 ]; then
   steps/segmentation/convert_targets_dir_to_whole_recording.sh --cmd "$train_cmd" --nj $reco_nj \
     $data_dir $whole_data_dir \
     $dir/${ali_model_id}_${data_id}_sup_targets \
@@ -268,7 +269,7 @@ fi
 ###############################################################################
 # Convert the targets from decoding to whole recording. 
 ###############################################################################
-if [ $stage -le 9 ]; then
+if [ $stage -le 10 ]; then
   steps/segmentation/convert_targets_dir_to_whole_recording.sh --cmd "$train_cmd" --nj $reco_nj \
     $dir/${uniform_seg_data_id} $whole_data_dir \
     $dir/${model_id}_${uniform_seg_data_id}_targets \
@@ -285,7 +286,7 @@ fi
 # We assume in this setup that this is silence i.e. [ 1 0 0 ].
 ###############################################################################
 
-if [ $stage -le 10 ]; then
+if [ $stage -le 11 ]; then
   echo " [ 1 0 0 ]" > $dir/default_targets.vec
   steps/segmentation/get_targets_for_out_of_segments.sh --cmd "$train_cmd" \
     --nj $reco_nj --frame-subsampling-factor 3 \
@@ -301,9 +302,9 @@ fi
 # disagree (more than 0.5 probability on different classes), then those frames
 # are removed by setting targets to [ 0 0 0 ]. 
 ###############################################################################
-if [ $stage -le 11 ]; then
+if [ $stage -le 12 ]; then
   steps/segmentation/merge_targets_dirs.sh --cmd "$train_cmd" --nj $reco_nj \
-    --weights $merge_weights --remove-mismatch-frames true \
+    --weights $merge_weights --remove-mismatch-frames $remove_mismatch_frames \
     $whole_data_dir \
     $dir/${ali_model_id}_${whole_data_id}_sup_targets_sub3 \
     $dir/${model_id}_${whole_data_id}_targets_sub3 \