From 13071f1ef2a2a6ec91e5adeb3edd2f05beb05c61 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Mon, 19 Mar 2018 22:56:15 -0700
Subject: [PATCH 01/26] initial setting

---
 egs/zeroth_korean/s5/RESULTS               | 24 +++++++++++++++++++++
 egs/zeroth_korean/s5/cmd.sh                | 25 ++++++++++++++++++++++
 egs/zeroth_korean/s5/conf/decode.config    |  1 +
 egs/zeroth_korean/s5/conf/mfcc.conf        |  1 +
 egs/zeroth_korean/s5/conf/mfcc_hires.conf  | 10 +++++++++
 egs/zeroth_korean/s5/conf/online_cmvn.conf |  1 +
 egs/zeroth_korean/s5/conf/queue.conf       | 10 +++++++++
 egs/zeroth_korean/s5/path.sh               |  6 ++++++
 egs/zeroth_korean/s5/steps                 |  1 +
 egs/zeroth_korean/s5/utils                 |  1 +
 10 files changed, 80 insertions(+)
 create mode 100644 egs/zeroth_korean/s5/RESULTS
 create mode 100644 egs/zeroth_korean/s5/cmd.sh
 create mode 100644 egs/zeroth_korean/s5/conf/decode.config
 create mode 100644 egs/zeroth_korean/s5/conf/mfcc.conf
 create mode 100644 egs/zeroth_korean/s5/conf/mfcc_hires.conf
 create mode 100644 egs/zeroth_korean/s5/conf/online_cmvn.conf
 create mode 100644 egs/zeroth_korean/s5/conf/queue.conf
 create mode 100755 egs/zeroth_korean/s5/path.sh
 create mode 120000 egs/zeroth_korean/s5/steps
 create mode 120000 egs/zeroth_korean/s5/utils

diff --git a/egs/zeroth_korean/s5/RESULTS b/egs/zeroth_korean/s5/RESULTS
new file mode 100644
index 00000000000..8a189e3f501
--- /dev/null
+++ b/egs/zeroth_korean/s5/RESULTS
@@ -0,0 +1,24 @@
+# In the results below, "tgsmall" is the pruned 3-gram LM, which is used for lattice generation.
+# The following language models are then used for rescoring:
+# a) tglarge- the full, non-pruned 3-gram LM
+# b) fglarge- non-pruned 4-gram LM
+# The "test-clean" sets generally contain, relatively cleaner Korean speech,
+# the "test_200" are subset of "test-clean", designed for quick evaluation
+
+### SAT GMM model trained on the "train-01" set (51 hours "clean" speech)
+decode_fglarge_test_200/wer_14_0.5:%WER 21.17 [ 873 / 4124, 93 ins, 172 del, 608 sub ]
+decode_tglarge_test_200/wer_15_0.0:%WER 21.46 [ 885 / 4124, 101 ins, 168 del, 616 sub ]
+decode_tgsmall_test_200/wer_14_0.5:%WER 33.83 [ 1395 / 4124, 85 ins, 330 del, 980 sub ]
+decode_tgsmall_test_200.si/wer_14_0.0:%WER 46.02 [ 1898 / 4124, 133 ins, 389 del, 1376 sub ]
+
+### Chain model trained on the "train-01" set 
+tdnn1n_online/decode_fglarge_test_200/wer_13_1.0:%WER 11.25 [ 464 / 4124, 65 ins, 78 del, 321 sub ]
+tdnn1n_online/decode_tgsmall_test_200/wer_13_0.0:%WER 18.09 [ 746 / 4124, 89 ins, 123 del, 534 sub ]
+tdnn_opgru_1c_sp_online/decode_fglarge_test_200/wer_8_1.0:%WER 9.00 [ 371 / 4124, 50 ins, 63 del, 258 sub ]
+tdnn_opgru_1c_sp_online/decode_tgsmall_test_200/wer_8_0.5:%WER 14.06 [ 580 / 4124, 62 ins, 92 del, 426 sub ]
+
+### Chain model trained on the "train-01" set with multi-conditioned data augmentation
+tdnn1n_rvb_online/decode_fglarge_test_200/wer_10_0.0:%WER 10.11 [ 417 / 4124, 73 ins, 57 del, 287 sub ]
+tdnn1n_rvb_online/decode_tgsmall_test_200/wer_8_0.5:%WER 16.27 [ 671 / 4124, 87 ins, 91 del, 493 sub ]
+tdnn_lstm_1e_rvb_online/decode_fglarge_test_200/wer_13_0.0:%WER 11.47 [ 473 / 4124, 74 ins, 61 del, 338 sub ]
+tdnn_lstm_1e_rvb_online/decode_tgsmall_test_200/wer_12_1.0:%WER 16.97 [ 700 / 4124, 72 ins, 130 del, 498 sub ]
diff --git a/egs/zeroth_korean/s5/cmd.sh b/egs/zeroth_korean/s5/cmd.sh
new file mode 100644
index 00000000000..1687940f7d1
--- /dev/null
+++ b/egs/zeroth_korean/s5/cmd.sh
@@ -0,0 +1,25 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="run.pl --mem 2G"
+export decode_cmd="run.pl --mem 4G"
+export mkgraph_cmd="run.pl --mem 8G"
+export normalize_cmd="run.pl --mem 4G"
+
+hostInAtlas="ares hephaestus jupiter neptune"
+if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]]; then
+    queue_conf=conf/queue.conf
+    export train_cmd="queue.pl --config $queue_conf --mem 4G"
+    export decode_cmd="queue.pl --config $queue_conf --mem 8G"
+    export mkgraph_cmd="queue.pl --config $queue_conf --mem 16G"
+    export normalize_cmd="queue.pl --config $queue_conf --mem 4G"
+fi
diff --git a/egs/zeroth_korean/s5/conf/decode.config b/egs/zeroth_korean/s5/conf/decode.config
new file mode 100644
index 00000000000..7ba966f2b83
--- /dev/null
+++ b/egs/zeroth_korean/s5/conf/decode.config
@@ -0,0 +1 @@
+# empty config, just use the defaults.
diff --git a/egs/zeroth_korean/s5/conf/mfcc.conf b/egs/zeroth_korean/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/zeroth_korean/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/zeroth_korean/s5/conf/mfcc_hires.conf b/egs/zeroth_korean/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..434834a6725
--- /dev/null
+++ b/egs/zeroth_korean/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 
diff --git a/egs/zeroth_korean/s5/conf/online_cmvn.conf b/egs/zeroth_korean/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/zeroth_korean/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/zeroth_korean/s5/conf/queue.conf b/egs/zeroth_korean/s5/conf/queue.conf
new file mode 100644
index 00000000000..2aa9ee6a211
--- /dev/null
+++ b/egs/zeroth_korean/s5/conf/queue.conf
@@ -0,0 +1,10 @@
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0
+option gpu=* -l gpu=$0
diff --git a/egs/zeroth_korean/s5/path.sh b/egs/zeroth_korean/s5/path.sh
new file mode 100755
index 00000000000..91c09618924
--- /dev/null
+++ b/egs/zeroth_korean/s5/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=ko_KR.UTF-8
diff --git a/egs/zeroth_korean/s5/steps b/egs/zeroth_korean/s5/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/zeroth_korean/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/zeroth_korean/s5/utils b/egs/zeroth_korean/s5/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/zeroth_korean/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file

From d2856ba5614267bdb32c611aab97e3040b96a9fb Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Tue, 20 Mar 2018 11:30:09 -0700
Subject: [PATCH 02/26] main script

---
 .../chain/multi_condition/run_tdnn_1b.sh      | 299 ++++++++++++++++
 .../chain/multi_condition/run_tdnn_1n.sh      | 302 ++++++++++++++++
 .../chain/multi_condition/run_tdnn_lstm_1e.sh | 328 ++++++++++++++++++
 .../s5/local/chain/run_tdnn_1a.sh             | 266 ++++++++++++++
 .../s5/local/chain/run_tdnn_1b.sh             | 271 +++++++++++++++
 .../s5/local/chain/run_tdnn_1n.sh             | 275 +++++++++++++++
 .../s5/local/chain/run_tdnn_lstm_1e.sh        | 290 ++++++++++++++++
 .../s5/local/chain/run_tdnn_opgru_1c.sh       | 291 ++++++++++++++++
 egs/zeroth_korean/s5/local/data_prep.sh       | 104 ++++++
 .../s5/local/download_and_untar.sh            |  61 ++++
 egs/zeroth_korean/s5/local/format_lms.sh      |  63 ++++
 .../s5/local/multi_condition/copy_ali_dir.sh  |  78 +++++
 .../s5/local/nnet2/run_5a_recData01.sh        |  76 ++++
 .../s5/local/nnet2/run_5a_train_2x.sh         | 105 ++++++
 .../s5/local/nnet2/run_5a_train_clean.sh      |  77 ++++
 .../multi_condition/run_ivector_common.sh     | 214 ++++++++++++
 .../s5/local/nnet3/run_ivector_common.sh      | 124 +++++++
 .../local/online/export_online_nnet2_model.sh |  33 ++
 .../s5/local/online/run_nnet2_common.sh       | 101 ++++++
 .../s5/local/online/run_nnet2_ms.sh           | 267 ++++++++++++++
 .../s5/local/online/run_nnet2_ms_disc.sh      | 164 +++++++++
 egs/zeroth_korean/s5/local/prepare_dict.sh    |  65 ++++
 egs/zeroth_korean/s5/local/score.sh           |  63 ++++
 .../s5/local/updateSegmentation.sh            |  51 +++
 egs/zeroth_korean/s5/run.sh                   | 194 +++++++++++
 25 files changed, 4162 insertions(+)
 create mode 100755 egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh
 create mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh
 create mode 100755 egs/zeroth_korean/s5/local/data_prep.sh
 create mode 100755 egs/zeroth_korean/s5/local/download_and_untar.sh
 create mode 100755 egs/zeroth_korean/s5/local/format_lms.sh
 create mode 100755 egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh
 create mode 100755 egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh
 create mode 100755 egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh
 create mode 100755 egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh
 create mode 100755 egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh
 create mode 100755 egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
 create mode 100755 egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh
 create mode 100755 egs/zeroth_korean/s5/local/online/run_nnet2_common.sh
 create mode 100755 egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh
 create mode 100755 egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh
 create mode 100755 egs/zeroth_korean/s5/local/prepare_dict.sh
 create mode 100755 egs/zeroth_korean/s5/local/score.sh
 create mode 100755 egs/zeroth_korean/s5/local/updateSegmentation.sh
 create mode 100755 egs/zeroth_korean/s5/run.sh

diff --git a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh
new file mode 100755
index 00000000000..c8ebaeb2e05
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+num_data_reps=1        # number of reverberated copies of data to generate
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=_rvb       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1b_rvb  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+max_param_change=2.0
+
+# training chunk-options
+chunk_width=150
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+
+# training options
+srand=0
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=4
+minibatch_size=128
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/multi_condition/run_ivector_common.sh --stage $stage  \
+                                 --speed-perturb ${speed_perturb} \
+                                 --num-data-reps ${num_data_reps} \
+                                 --rvb-affix ${nnet3_affix}
+
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+fi
+
+gmm_dir=exp/${gmm}
+clean_lat_dir=exp/${gmm}_${train_set}_lats
+lat_dir=${clean_lat_dir}_rvb${num_data_reps}
+dir=exp/chain_rvb/tdnn${affix}
+train_data_dir=data/${train_set}_rvb${num_data_reps}_hires
+train_ivector_dir=exp/nnet3_rvb/ivectors_${train_set}_rvb${num_data_reps}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain_rvb/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $clean_lat_dir
+  rm $clean_lat_dir/fsts.*.gz # save space
+  # Create the lattices for the reverberated data
+
+  # We use the lattices/alignments from the clean data for the reverberated data.
+  mkdir -p $lat_dir/temp/
+  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
+
+  # copy the lattices for the reverberated data
+  rm -f $lat_dir/temp/combined_lats.scp
+  touch $lat_dir/temp/combined_lats.scp
+  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
+  for i in `seq 0 $num_data_reps`; do
+    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
+  done
+  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
+
+  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
+  echo "1" > $lat_dir/num_jobs
+
+  # copy other files from original lattice dir
+  for f in cmvn_opts final.mdl splice_opts tree; do
+    cp $clean_lat_dir/$f $lat_dir/$f
+  done
+
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-renorm-layer name=tdnn1 dim=512
+  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
+
+  ## adding the layers for chain branch
+  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.srand $srand \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --egs.chunk-width $chunk_width \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs $remove_egs \
+    --use-gpu true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3_rvb/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh
new file mode 100755
index 00000000000..a89882a7913
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+num_data_reps=1        # number of reverberated copies of data to generate
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=_rvb       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1n_rvb  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+max_param_change=2.0
+
+# training chunk-options
+get_egs_stage=-10
+chunk_width=150,110,100
+
+# training options
+num_jobs_initial=2
+num_jobs_final=7
+num_epochs=4
+minibatch_size=128
+initial_effective_lrate=0.0015
+final_effective_lrate=0.0002
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+local/nnet3/multi_condition/run_ivector_common.sh --stage $stage  \
+                                 --speed-perturb ${speed_perturb} \
+                                 --num-data-reps ${num_data_reps} \
+                                 --rvb-affix ${nnet3_affix}
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+fi
+
+gmm_dir=exp/${gmm}
+clean_lat_dir=exp/${gmm}_${train_set}_lats
+lat_dir=${clean_lat_dir}_rvb${num_data_reps}
+dir=exp/chain_rvb/tdnn${affix}
+train_data_dir=data/${train_set}_rvb${num_data_reps}_hires
+train_ivector_dir=exp/nnet3_rvb/ivectors_${train_set}_rvb${num_data_reps}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain_rvb/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $clean_lat_dir
+  rm $clean_lat_dir/fsts.*.gz # save space
+  # Create the lattices for the reverberated data
+
+  # We use the lattices/alignments from the clean data for the reverberated data.
+  mkdir -p $lat_dir/temp/
+  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
+
+  # copy the lattices for the reverberated data
+  rm -f $lat_dir/temp/combined_lats.scp
+  touch $lat_dir/temp/combined_lats.scp
+  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
+  for i in `seq 0 $num_data_reps`; do
+    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
+  done
+  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
+
+  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
+  echo "1" > $lat_dir/num_jobs
+
+  # copy other files from original lattice dir
+  for f in cmvn_opts final.mdl splice_opts tree; do
+    cp $clean_lat_dir/$f $lat_dir/$f
+  done
+
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  opts="l2-regularize=0.002"
+  linear_opts="orthonormal-constraint=1.0"
+  output_opts="l2-regularize=0.0005 bottleneck-dim=256"
+
+  mkdir -p $dir/configs
+
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=1280
+  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn3l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn3 $opts dim=1280
+  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn5l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
+  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn7l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
+  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
+  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn8l,tdnn6l) dim=1280
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --egs.stage $get_egs_stage \
+    --egs.chunk-width $chunk_width \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs $remove_egs \
+    --use-gpu true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3_rvb/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh
new file mode 100755
index 00000000000..bb6a2fc1be9
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh
@@ -0,0 +1,328 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=20
+train_set=train_clean
+num_data_reps=1        # number of reverberated copies of data to generate
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=_rvb       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1e_rvb  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+get_egs_stage=-10
+xent_regularize=0.01
+self_repair_scale=0.00001
+label_delay=5
+max_param_change=2.0
+
+# training chunk-options
+chunk_left_context=40
+chunk_right_context=0
+frames_per_chunk=140,100,160
+
+frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
+extra_left_context=50
+extra_right_context=0
+
+# training options
+srand=0
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=4
+minibatch_size=128
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/multi_condition/run_ivector_common.sh --stage $stage  \
+                                 --speed-perturb ${speed_perturb} \
+                                 --num-data-reps ${num_data_reps} \
+                                 --rvb-affix ${nnet3_affix} \
+								 --nj $nj
+
+
+#echo "run_ivector_common.sh is finished" |\
+#	mail -s "[alarm]finishing" -aFrom:jupiter lucasjo@goodatlas.com
+echo "run_ivector_common.sh trainig is finished at" $finishTime
+
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+fi
+
+gmm_dir=exp/${gmm}
+clean_lat_dir=exp/${gmm}_${train_set}_lats
+lat_dir=${clean_lat_dir}_rvb${num_data_reps}
+dir=exp/chain_rvb/tdnn_lstm_${affix}
+train_data_dir=data/${train_set}_rvb${num_data_reps}_hires
+train_ivector_dir=exp/nnet3_rvb/ivectors_${train_set}_rvb${num_data_reps}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain_rvb/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $clean_lat_dir
+  rm $clean_lat_dir/fsts.*.gz # save space
+  # Create the lattices for the reverberated data
+
+  # We use the lattices/alignments from the clean data for the reverberated data.
+  mkdir -p $lat_dir/temp/
+  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
+
+  # copy the lattices for the reverberated data
+  rm -f $lat_dir/temp/combined_lats.scp
+  touch $lat_dir/temp/combined_lats.scp
+  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
+  for i in `seq 0 $num_data_reps`; do
+    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
+  done
+  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
+
+  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
+  echo "1" > $lat_dir/num_jobs
+
+  # copy other files from original lattice dir
+  for f in cmvn_opts final.mdl splice_opts tree; do
+    cp $clean_lat_dir/$f $lat_dir/$f
+  done
+
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  lstm_opts="decay-time=20"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-renorm-layer name=tdnn1 dim=1024
+  relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
+  relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
+
+  # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
+  fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+  relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
+  relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
+  fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+  relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
+  relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
+  fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+
+  ## adding the layers for chain branch
+  output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+
+fi
+
+
+if [ $stage -le 12 ]; then
+
+  hostInAtlas="ares hephaestus jupiter neptune"
+  if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage \
+      $dir/egs/storage
+  fi
+  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+  #  utils/create_split_dir.pl \
+  #   /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  #fi
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.shrink-value 0.99 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.optimization.momentum 0.0 \
+    --trainer.deriv-truncate-margin 8 \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_chunk \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.chunk-left-context-initial 0 \
+    --egs.chunk-right-context-final 0 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3_rvb/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh
new file mode 100755
index 00000000000..494f0af5a72
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh
@@ -0,0 +1,266 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1a  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+max_param_change=2.0
+
+# training chunk-options
+chunk_width=150
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+
+# training options
+srand=0
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=4
+minibatch_size=128
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage 
+
+gmm_dir=exp/${gmm}
+lat_dir=exp/chain/${gmm}_${train_set}_lats
+dir=exp/chain/tdnn${affix}
+train_data_dir=data/${train_set}_hires
+train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-renorm-layer name=tdnn1 dim=512
+  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
+
+  ## adding the layers for chain branch
+  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.srand $srand \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --egs.chunk-width $chunk_width \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs $remove_egs \
+    --use-gpu true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh
new file mode 100755
index 00000000000..5ebb85009d8
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1b  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+max_param_change=2.0
+
+# training chunk-options
+chunk_width=150
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+
+# training options
+srand=0
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=4
+minibatch_size=128
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
+
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+fi
+
+gmm_dir=exp/${gmm}
+lat_dir=exp/chain/${gmm}_${train_set}_lats
+dir=exp/chain/tdnn${affix}
+train_data_dir=data/${train_set}_hires
+train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-renorm-layer name=tdnn1 dim=512
+  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
+
+  ## adding the layers for chain branch
+  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.srand $srand \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --egs.chunk-width $chunk_width \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs $remove_egs \
+    --use-gpu true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh
new file mode 100755
index 00000000000..33d4790ce55
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh
@@ -0,0 +1,275 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1n  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+max_param_change=2.0
+
+# training chunk-options
+get_egs_stage=-10
+chunk_width=150,110,100
+
+# training options
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=6
+minibatch_size=128
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
+
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+fi
+
+gmm_dir=exp/${gmm}
+lat_dir=exp/chain/${gmm}_${train_set}_lats
+dir=exp/chain/tdnn${affix}
+train_data_dir=data/${train_set}_hires
+train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  opts="l2-regularize=0.002"
+  linear_opts="orthonormal-constraint=1.0"
+  output_opts="l2-regularize=0.0005 bottleneck-dim=256"
+
+  mkdir -p $dir/configs
+
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=1280
+  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn3l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn3 $opts dim=1280
+  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn5l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
+  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn7l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
+  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
+  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn8l,tdnn6l) dim=1280
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --egs.stage $get_egs_stage \
+    --egs.chunk-width $chunk_width \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs $remove_egs \
+    --use-gpu true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh
new file mode 100755
index 00000000000..7b2eb9904f1
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1e  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# LSTM/chain options
+train_stage=-10
+get_egs_stage=-10
+xent_regularize=0.01
+self_repair_scale=0.00001
+label_delay=5
+max_param_change=2.0
+
+# training chunk-options
+chunk_left_context=40
+chunk_right_context=0
+frames_per_chunk=140,100,160
+
+frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
+extra_left_context=50
+extra_right_context=0
+
+# training options
+srand=0
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=4
+minibatch_size=128
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
+
+suffix=
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+  suffix=_sp
+fi
+
+gmm_dir=exp/${gmm}
+lat_dir=exp/chain/${gmm}_${train_set}_lats
+dir=exp/chain/tdnn_lstm_${affix}${suffix}
+train_data_dir=data/${train_set}_hires
+train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  lstm_opts="decay-time=20"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-renorm-layer name=tdnn1 dim=1024
+  relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
+  relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
+
+  # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
+  fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+  relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
+  relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
+  fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+  relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
+  relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
+  fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+
+  ## adding the layers for chain branch
+  output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.max-param-change $max_param_change \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.shrink-value 0.99 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.optimization.momentum 0.0 \
+    --trainer.deriv-truncate-margin 8 \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_chunk \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.chunk-left-context-initial 0 \
+    --egs.chunk-right-context-final 0 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh
new file mode 100755
index 00000000000..6b0817c3b37
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh
@@ -0,0 +1,291 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train_clean
+speed_perturb=true
+test_sets="test_200"
+gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1c  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+
+# OPGRU/chain options
+train_stage=-10
+get_egs_stage=-10
+xent_regularize=0.1
+label_delay=5
+max_param_change=2.0
+
+# training chunk-options
+chunk_width=150
+chunk_left_context=40
+chunk_right_context=0
+frames_per_chunk=
+
+extra_left_context=50
+extra_right_context=0
+
+# training options
+srand=0
+num_jobs_initial=2
+num_jobs_final=12
+num_epochs=8
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+remove_egs=true
+
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
+
+suffix=
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+  suffix=_sp
+fi
+
+gmm_dir=exp/${gmm}
+lat_dir=exp/chain/${gmm}_${train_set}_lats
+dir=exp/chain/tdnn_opgru_${affix}${suffix}
+train_data_dir=data/${train_set}_hires
+train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
+lores_train_data_dir=data/${train_set}
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain/tree_a
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+if [ -d exp/${gmm}_ali_${train_set} ]; then 
+    ali_dir=exp/${gmm}_ali_${train_set}
+else
+    echo "$0: Using Alignment from GMM dir at ${gmm}..."
+    ali_dir=${gmm_dir}
+fi
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Build a tree using our new topology.  
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  gru_opts="dropout-per-frame=true dropout-proportion=0.0"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=1024
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
+  relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
+
+  # check steps/libs/nnet3/xconfig/gru.py for the other options and defaults
+  norm-opgru-layer name=opgru1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $gru_opts
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
+  norm-opgru-layer name=opgru2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $gru_opts
+  relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
+  norm-opgru-layer name=opgru3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $gru_opts
+
+  ## adding the layers for chain branch
+  output-layer name=output input=opgru3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  output-layer name=output-xent input=opgru3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+
+fi
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+ steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $chunk_width \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.backstitch-training-scale 0.3 \
+    --trainer.optimization.backstitch-training-interval 1 \
+    --egs.chunk-left-context-initial 0 \
+    --egs.chunk-right-context-final 0 \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.shrink-value 0.99 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.max-param-change $max_param_change \
+    --trainer.deriv-truncate-margin 8 \
+    --cleanup.remove-egs true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir
+
+fi
+
+if [ $stage -le 13 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test_tgsmall/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if $test_online_decoding && [ $stage -le 14 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      data_affix=$(echo $data | sed s/test_//)
+      nspk=$(wc -l <data/${data}/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      for lmtype in tgsmall; do
+        steps/online/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $nspk --cmd "$decode_cmd" \
+          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
+      done
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,fglarge} \
+       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/data_prep.sh b/egs/zeroth_korean/s5/local/data_prep.sh
new file mode 100755
index 00000000000..723028afb35
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/data_prep.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+# Copyright 2014  Vassil Panayotov
+#           2014  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <src-dir> <dst-dir>"
+  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
+  exit 1
+fi
+
+src=$1
+dst=$2
+
+# all utterances are FLAC compressed
+if ! which flac >&/dev/null; then
+   echo "Please install 'flac' on ALL worker nodes!"
+   exit 1
+fi
+
+spk_file=$src/../AUDIO_INFO
+
+mkdir -p $dst || exit 1;
+
+[ ! -d $src ] && echo "$0: no such directory $src" && exit 1;
+[ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1;
+
+wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
+trans=$dst/text; [[ -f "$trans" ]] && rm $trans
+utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk
+spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender
+utt2dur=$dst/utt2dur; [[ -f "$utt2dur" ]] && rm $utt2dur
+
+for scriptid_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
+  scriptid=$(basename $scriptid_dir)
+  if ! [ $scriptid -eq $scriptid ]; then  # not integer.
+    echo "$0: unexpected subdirectory name $scriptid"
+    exit 1;
+  fi
+  
+  for reader_dir in $(find -L $scriptid_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
+    reader=$(basename $reader_dir)
+    if ! [ "$reader" -eq "$reader" ]; then
+      echo "$0: unexpected reader-subdirectory name $reader"
+      exit 1;
+    fi
+
+	reader_gender=$(egrep "^$reader\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($3)}')
+	if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then
+      echo "Unexpected gender: '$reader_gender'"
+      exit 1;
+    fi
+	
+	echo "  "$scriptid $reader $reader_gender
+
+    find -L $reader_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
+		awk -v "dir=$reader_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1
+    
+	reader_trans=$reader_dir/${reader}_${scriptid}.trans.txt
+    [ ! -f  $reader_trans ] && echo "$0: expected file $reader_trans to exist" && exit 1
+    cat $reader_trans >>$trans
+
+    # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered
+    #       to be a different speaker. This is done for simplicity and because we want
+    #       e.g. the CMVN to be calculated per-chapter
+    awk -v "reader=$reader" -v "scriptid=$scriptid" '{printf "%s %s_%s\n", $1, reader, scriptid}' \
+      <$reader_trans >>$utt2spk || exit 1
+    
+	# reader -> gender map (again using per-chapter granularity)
+    echo "${reader}_${scriptid} $reader_gender" >>$spk2gender  
+
+  done
+done
+
+# sort 
+cat $wav_scp    | sort > tmp
+cp tmp $wav_scp
+cat $trans      | sort > tmp
+cp tmp $trans
+cat $utt2spk    | sort > tmp
+cp tmp $utt2spk
+cat $spk2gender | sort > tmp
+cp tmp $spk2gender
+rm tmp
+
+
+spk2utt=$dst/spk2utt
+utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1
+
+ntrans=$(wc -l <$trans)
+nutt2spk=$(wc -l <$utt2spk)
+! [ "$ntrans" -eq "$nutt2spk" ] && \
+  echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1;
+
+utils/data/get_utt2dur.sh $dst 1>&2 || exit 1
+
+utils/validate_data_dir.sh --no-feats $dst || exit 1;
+
+echo "$0: successfully prepared data in $dst"
+
+exit 0
diff --git a/egs/zeroth_korean/s5/local/download_and_untar.sh b/egs/zeroth_korean/s5/local/download_and_untar.sh
new file mode 100755
index 00000000000..0b56bcb37b3
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/download_and_untar.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Copyright 2018 Lucas Jo (Atlas Guide)
+#           2018 Wonkyum Lee (Gridspace)
+# Apache 2.0
+
+if [ $# -ne "1" ]; then
+	echo "Usage: $0 <download_dir>"
+	echo "e.g.: $0 ./db"
+	exit 1
+fi
+
+exists(){
+	command -v "$1" >/dev/null 2>&1
+}
+
+
+dir=$1
+local_lm_dir=data/local/lm
+
+AUDIOINFO='AUDIO_INFO'
+AUDIOLIST='train_data_01 test_data_01'
+
+echo "Now download corpus ----------------------------------------------------"
+if [ ! -f $dir/db.tar.gz ]; then
+  if [ ! -d $dir ]; then 
+    mkdir -p $dir
+  fi
+  wget -O $dir/db.tar.gz https://storage.googleapis.com/zeroth_project/zeroth_korean.tar.gz 
+else
+  echo "  $dir/db.tar.gz already exist"
+fi
+
+echo "Now extract corpus ----------------------------------------------------"
+if [ ! -f $dir/$AUDIOINFO ]; then
+  tar -zxvf $dir/db.tar.gz -C $dir
+  else
+    echo "  corpus already extracted"
+fi
+
+if [ ! -d $local_lm_dir ]; then
+    mkdir -p $local_lm_dir
+fi
+echo "Check LMs files"
+LMList="\
+  zeroth.lm.fg.arpa.gz \
+  zeroth.lm.tg.arpa.gz \
+  zeroth.lm.tgmed.arpa.gz \
+  zeroth.lm.tgsmall.arpa.gz \
+  zeroth_lexicon \
+  zeroth_morfessor.seg"
+
+for file in $LMList; do
+  if [ -f $local_lm_dir/$file ]; then
+    echo $file already exist
+  else
+    echo "Linking "$file
+    ln -s $PWD/$dir/$file $local_lm_dir/$file
+  fi
+done
+echo "all the files (lexicon, LM, segment model) are ready"
diff --git a/egs/zeroth_korean/s5/local/format_lms.sh b/egs/zeroth_korean/s5/local/format_lms.sh
new file mode 100755
index 00000000000..5947ae6b620
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/format_lms.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Prepares the test time language model(G) transducers
+# (adapted from wsj/s5/local/wsj_format_data.sh)
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+
+. ./path.sh || exit 1;
+
+# begin configuration section
+src_dir=data/lang
+# end configuration section
+
+. utils/parse_options.sh || exit 1;
+
+set -e
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 <lm-dir>"
+  echo "e.g.: $0 /export/a15/vpanayotov/data/lm"
+  echo ", where:"
+  echo "    <lm-dir> is the directory in which the language model is stored/downloaded"
+  echo "Options:"
+  echo "   --src-dir  <dir>           # source lang directory, default data/lang"
+  exit 1
+fi
+
+lm_dir=$1
+
+if [ ! -d $lm_dir ]; then
+  echo "$0: expected source LM directory $lm_dir to exist"
+  exit 1;
+fi
+if [ ! -f $src_dir/words.txt ]; then
+  echo "$0: expected $src_dir/words.txt to exist."
+  exit 1;
+fi
+
+
+tmpdir=data/local/lm_tmp.$$
+trap "rm -r $tmpdir" EXIT
+
+mkdir -p $tmpdir
+
+for lm_suffix in tgsmall tgmed; do
+  # tglarge is prepared by a separate command, called from run.sh; we don't
+  # want to compile G.fst for tglarge, as it takes a while.
+  test=${src_dir}_test_${lm_suffix}
+  mkdir -p $test
+  cp -r ${src_dir}/* $test
+  gunzip -c $lm_dir/zeroth.lm.${lm_suffix}.arpa.gz | \
+    arpa2fst --disambig-symbol=#0 \
+             --read-symbol-table=$test/words.txt - $test/G.fst
+
+  utils/validate_lang.pl --skip-determinization-check $test || exit 1;
+done
+
+echo "Succeeded in formatting data."
+
+exit 0
diff --git a/egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh b/egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh
new file mode 100755
index 00000000000..42ea2dc4b9d
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Copyright 2014  Johns Hopkins University (author: Vijayaditya Peddinti)
+# Apache 2.0
+
+# This script operates on a directory, such as in exp/tri4a_ali,
+# that contains some subset of the following files:
+#  ali.*.gz
+#  tree
+#  cmvn_opts
+#  splice_opts
+#  num_jobs
+#  final.mdl
+# It copies to another directory, possibly adding a specified prefix or a suffix
+# to the utterance names.
+
+
+# begin configuration section
+utt_prefix=
+utt_suffix=
+cmd=run.pl
+# end configuration section
+
+. utils/parse_options.sh
+
+if [ $# != 2 ]; then
+  echo "Usage: "
+  echo "  $0 [options] <src_dir> <dest_dir>"
+  echo "e.g.:"
+  echo " $0  --utt-prefix=1- exp/tri4a_ali exp/tri4a_rev1_ali"
+  echo "Options"
+  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
+  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
+  exit 1;
+fi
+
+
+export LC_ALL=C
+
+src_dir=$1
+dest_dir=$2
+
+mkdir -p $dest_dir
+
+if [ ! -f $src_dir/ali.1.gz ]; then
+  echo "copy_ali_dir.sh: no such files $src_dir/ali.*.gz"
+  exit 1;
+fi
+
+for f in tree cmvn_opts splice_opts num_jobs final.mdl; do
+  if [ ! -f $src_dir/$f ]; then
+    echo "copy_ali_dir.sh: no such file $src_dir/$f this might be serious error."
+    continue
+  fi
+  cp $src_dir/$f $dest_dir/
+done
+
+nj=$(cat $dest_dir/num_jobs)
+mkdir -p $dest_dir/temp
+cat << EOF > $dest_dir/temp/copy_ali.sh
+set -e;
+id=\$1
+echo "$src_dir/ali.\$id.gz"
+gunzip -c $src_dir/ali.\$id.gz | \
+  copy-int-vector ark:- ark,t:- | \
+python -c "
+import sys
+for line in sys.stdin:
+  parts = line.split()
+  print '$utt_prefix{0}$utt_suffix {1}'.format(parts[0], ' '.join(parts[1:]))
+" | \
+  gzip -c >$dest_dir/ali.\$id.gz || exit 1;
+set +o pipefail; # unset the pipefail option.
+EOF
+chmod +x $dest_dir/temp/copy_ali.sh
+$cmd -v PATH JOB=1:$nj $dest_dir/temp/copy_ali.JOB.log $dest_dir/temp/copy_ali.sh JOB || exit 1;
+
+echo "$0: copied alignments from $src_dir to $dest_dir"
diff --git a/egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh b/egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh
new file mode 100755
index 00000000000..c7e563906c6
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# This is p-norm neural net training, with the "fast" script, on top of adapted
+# 40-dimensional features.
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+
+
+train_stage=-10
+use_gpu=true
+
+. cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1 
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+  fi
+  parallel_opts="--gpu 1"
+  num_threads=1
+  minibatch_size=512
+  dir=exp/nnet5a_recData01_gpu
+else
+  # with just 4 jobs this might be a little slow.
+  num_threads=16
+  parallel_opts="--num-threads $num_threads" 
+  minibatch_size=128
+  dir=exp/nnet5a_recData01
+fi
+
+. ./cmd.sh
+. utils/parse_options.sh
+
+if [ ! -f $dir/final.mdl ]; then
+  #if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then 
+  #   # spread the egs over various machines.  will help reduce overload of any
+  #   # one machine.
+  #   utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/egs/storage $dir/egs/storage
+  #fi
+
+  steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
+   --samples-per-iter 400000 \
+   --parallel-opts "$parallel_opts" \
+   --num-threads "$num_threads" \
+   --minibatch-size "$minibatch_size" \
+   --num-jobs-nnet 4  --mix-up 8000 \
+   --initial-learning-rate 0.01 --final-learning-rate 0.001 \
+   --num-hidden-layers 4 \
+   --pnorm-input-dim 2000 --pnorm-output-dim 400 \
+   --cmd "$decode_cmd" \
+    data/recData01 data/lang exp/tri4b_ali_recData01 $dir || exit 1
+fi
+
+
+for test in testData01; do
+  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" \
+    --transform-dir exp/tri4b/decode_tgsmall_$test \
+    exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
+  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+    data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
+  steps/lmrescore_const_arpa.sh \
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+    data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
+  steps/lmrescore_const_arpa.sh \
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+    data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
+done
+
+exit 0;
+
diff --git a/egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh b/egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh
new file mode 100755
index 00000000000..9467dc54230
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+# This is p-norm neural net training, with the "fast" script, on top of adapted
+# 40-dimensional features.
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+
+
+train_stage=-10
+use_gpu=true
+stage=0
+
+. cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1 
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+  fi
+  parallel_opts="--gpu 1"
+  num_threads=1
+  minibatch_size=512
+  dir=exp/nnet5a_train_2x_gpu
+else
+  # with just 4 jobs this might be a little slow.
+  num_threads=16
+  parallel_opts="--num-threads $num_threads" 
+  minibatch_size=128
+  dir=exp/nnet5a_train_2x
+fi
+
+. ./cmd.sh
+. utils/parse_options.sh
+
+if [ $stage -le 1 ]; then
+    if [ ! -f $dir/final.mdl ]; then
+      #if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then 
+      #   # spread the egs over various machines.  will help reduce overload of any
+      #   # one machine.
+      #   utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/egs/storage $dir/egs/storage
+      #fi
+    
+      steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
+       --samples-per-iter 400000 \
+       --parallel-opts "$parallel_opts" \
+       --num-threads "$num_threads" \
+       --minibatch-size "$minibatch_size" \
+       --num-jobs-nnet 4  --mix-up 8000 \
+       --initial-learning-rate 0.01 --final-learning-rate 0.001 \
+       --num-hidden-layers 4 \
+       --pnorm-input-dim 2000 --pnorm-output-dim 400 \
+       --cmd "$decode_cmd" \
+        data/train_2x data/lang exp/tri5b $dir || exit 1
+    fi
+fi
+    
+if [ $stage -le 2 ]; then
+    for test in test_200 test_noisy_snr20_200; do
+      steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
+        --transform-dir exp/tri5b/decode_tgsmall_$test \
+        exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
+      #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      #  data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
+      #steps/lmrescore_const_arpa.sh \
+      #  --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      #  data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+        data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
+    done
+fi
+
+#if [ $stage -le 3 ]; then
+#	echo "#### $0: stage 3 #####"
+#	# If this setup used PLP features, we'd have to give the option --feature-type plp
+#	# to the script below.
+#	steps/online/nnet2/prepare_online_decoding.sh data/lang "$dir" ${dir}_online || exit 1;
+#fi
+#
+#if [ $stage -le 4 ]; then
+#	echo "#### $0: stage 4 #####"
+#	# this version of the decoding treats each utterance separately
+#	# without carrying forward speaker information.
+#	for test in test_200 test_noisy_snr20_200; do
+#		steps/online/nnet2/decode.sh --cmd "$decode_cmd" --nj 20 \
+#			--per-utt true exp/tri4b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
+#		#steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#		#	data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt  || exit 1;
+#		#steps/lmrescore_const_arpa.sh \
+#		#	--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#		#	data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
+#		steps/lmrescore_const_arpa.sh \
+#			--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#			data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
+#	done
+#fi
+#
+#exit 0;
+
diff --git a/egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh b/egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh
new file mode 100755
index 00000000000..bd94a39f4e0
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# This is p-norm neural net training, with the "fast" script, on top of adapted
+# 40-dimensional features.
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+
+
+train_stage=-10
+use_gpu=true
+
+. cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1 
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+  fi
+  parallel_opts="--gpu 1"
+  num_threads=1
+  minibatch_size=512
+  dir=exp/nnet5a_train_clean_gpu
+else
+  # with just 4 jobs this might be a little slow.
+  num_threads=16
+  parallel_opts="--num-threads $num_threads" 
+  minibatch_size=128
+  dir=exp/nnet5a_train_clean
+fi
+
+. ./cmd.sh
+. utils/parse_options.sh
+
+if [ ! -f $dir/final.mdl ]; then
+  #if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then 
+  #   # spread the egs over various machines.  will help reduce overload of any
+  #   # one machine.
+  #   utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/egs/storage $dir/egs/storage
+  #fi
+
+  steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
+   --samples-per-iter 400000 \
+   --parallel-opts "$parallel_opts" \
+   --num-threads "$num_threads" \
+   --minibatch-size "$minibatch_size" \
+   --num-jobs-nnet 4  --mix-up 8000 \
+   --initial-learning-rate 0.02 --final-learning-rate 0.004 \
+   --num-hidden-layers 3 \
+   --pnorm-input-dim 2000 --pnorm-output-dim 400 \
+   --cmd "$decode_cmd" \
+    data/train_clean data/lang exp/tri4b_ali_train_clean $dir || exit 1
+fi
+
+
+#for test in test_clean; do
+for test in test_200; do
+  steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
+    --transform-dir exp/tri4b/decode_tgsmall_$test \
+    exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
+  #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+  #  data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
+  #steps/lmrescore_const_arpa.sh \
+  #  --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+  #  data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
+  steps/lmrescore_const_arpa.sh \
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+    data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
+done
+
+exit 0;
+
diff --git a/egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh
new file mode 100755
index 00000000000..deb5d146ce8
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+
+# this script contains some common (shared) parts of the run_nnet*.sh scripts.
+. cmd.sh
+
+
+stage=0
+gmmdir=exp/tri4b
+speed_perturb=false
+trainset=train_clean
+num_data_reps=1
+rvb_affix=_rvb
+nj=20
+maxThread=30  # the max number of schedullable thread on your machine 
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if [ "$speed_perturb" == "true" ]; then
+  if [ $stage -le 1 ]; then
+    #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
+    # _sp stands for speed-perturbed
+
+    for datadir in ${trainset} ; do
+      utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
+      utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
+      utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
+      utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
+      rm -r data/temp1 data/temp2
+
+      mfccdir=mfcc_perturbed
+      hostInAtlas="ares hephaestus jupiter neptune"
+      if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]] && [ ! -d $mfccdir/storage ]; then
+        mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
+        utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi-$(date +'%m_%d_%H_%M')/s5/$mfcc/storage \
+          $mfccdir/storage
+      fi
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj \
+        data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_tmp
+
+      utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
+      utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
+      utils/fix_data_dir.sh data/${datadir}_sp
+      rm -r data/temp0 data/${datadir}_tmp
+    done
+  fi
+
+  if [ $stage -le 2 ]; then
+    #obtain the alignment of the perturbed data
+    steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+      data/${trainset}_sp data/lang_nosp ${gmmdir} ${gmmdir}_ali_${trainset}_sp || exit 1
+  fi
+  trainset=${trainset}_sp
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: creating reverberated MFCC features"
+
+  mfccdir=mfcc_rvb
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  hostInAtlas="ares hephaestus jupiter neptune"
+  if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]] && [ ! -d $mfccdir/storage ]; then
+    mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
+    utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi-$(date +'%m_%d_%H_%M')/s5/$mfcc/storage \
+      $mfccdir/storage
+  fi
+
+  if [ ! -d "RIRS_NOISES" ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+  rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
+
+
+  ### applied GridEngine for speed-up
+  logdir=data/${trainset}/log
+  mkdir -p $logdir
+  nj=40
+
+  utils/split_data.sh data/$trainset $nj
+
+  $train_cmd JOB=1:$nj $logdir/reverberate.JOB.log \
+      python steps/data/reverberate_data_dir.py \
+      "${rvb_opts[@]}" \
+      --prefix "rev" \
+      --foreground-snrs "20:10:15:5:0" \
+      --background-snrs "20:10:15:5:0" \
+      --speech-rvb-probability 1 \
+      --pointsource-noise-addition-probability 1 \
+      --isotropic-noise-addition-probability 1 \
+      --num-replications ${num_data_reps} \
+      --max-noises-per-minute 20 \
+      --source-sampling-rate 16000 \
+      --include-original-data true \
+      data/${trainset}/split$nj/JOB data/${trainset}/split$nj/JOB_rvb${num_data_reps} \
+      || exit 1
+
+  dirs=
+  for i in $(seq $nj); do
+    dirs+=" data/${trainset}/split$nj/${i}_rvb${num_data_reps}"
+  done
+  mkdir -p data/${trainset}_rvb${num_data_reps}
+  utils/combine_data.sh data/${trainset}_rvb${num_data_reps} $dirs
+  ###
+
+  utils/copy_data_dir.sh data/${trainset}_rvb${num_data_reps} data/${trainset}_rvb${num_data_reps}_hires
+  utils/data/perturb_data_dir_volume.sh data/${trainset}_rvb${num_data_reps}_hires
+
+  ###
+  # commented by Lucas Jo 2017.10.31
+  #
+  # utt2dur is same after reverberation process
+  # this will helpfull to reduce time consuming get_egs.sh in nnet3 training
+  from=data/${trainset}
+  to=data/${trainset}_rvb${num_data_reps}_hires
+  
+  for i in `seq 1 $nj`; do
+	  cat data/${trainset}/split$nj/$i/reco2dur
+  done | sort -k1 > $from/reco2dur  
+  
+  if [ -f $to/utt2dur ] ; then
+    rm $to/uttdur
+  fi
+  for i in `seq 0 ${num_data_reps}`; do
+    cat $from/reco2dur | sed -e "s/^/rev${i}_/" >> $to/utt2dur  
+  done
+  ###
+
+
+  for datadir in ${trainset}_rvb${num_data_reps} ; do
+    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+  done
+
+  # copy the alignments for the newly created utterance ids
+  ali_dirs=
+  for i in `seq 0 $num_data_reps`; do
+    local/multi_condition/copy_ali_dir.sh --cmd "$decode_cmd" --utt-prefix "rev${i}_" ${gmmdir}_ali_${trainset} ${gmmdir}_ali_${trainset}_temp_$i || exit 1;
+    ali_dirs+=" ${gmmdir}_ali_${trainset}_temp_$i"
+  done
+  steps/combine_ali_dirs.sh data/${trainset}_rvb${num_data_reps} ${gmmdir}_ali_${trainset}_rvb $ali_dirs || exit 1;
+
+  # We need to build a small system just because we need the LDA+MLLT transform
+  # to train the diag-UBM on top of.  We align a subset of training data for
+  # this purpose.
+  utils/subset_data_dir.sh data/${trainset}_rvb${num_data_reps}_hires 100000 data/train_100k_hires
+  utils/subset_data_dir.sh data/${trainset}_rvb${num_data_reps}_hires 30000 data/train_30k_hires
+fi
+
+
+if [ $stage -le 4 ]; then
+  # Train a small system just for its LDA+MLLT transform.  We use --num-iters 13
+  # because after we get the transform (12th iter is the last), any further
+  # training is pointless.
+
+  mkdir exp -p exp/nnet3${rvb_affix}
+
+  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
+    --realign-iters "" \
+    --splice-opts "--left-context=3 --right-context=3" \
+    3000 10000 data/train_100k_hires data/lang_nosp \
+    ${gmmdir}_ali_${trainset}_rvb exp/nnet3${rvb_affix}/tri2b
+fi
+
+
+if [ $stage -le 5 ]; then
+  # To train a diagonal UBM we don't need very much data, so use a small subset
+  # (actually, it's not that small: still around 100 hours).
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj $nj --num_threads $maxThread --num-frames 700000 \
+    data/train_30k_hires 512 exp/nnet3${rvb_affix}/tri2b exp/nnet3${rvb_affix}/diag_ubm
+fi
+
+if [ $stage -le 6 ]; then
+  # iVector extractors can in general be sensitive to the amount of data, but
+  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
+  # we use just the 3k subset (about one fifth of the data, or 200 hours).
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/${trainset}_rvb${num_data_reps}_hires exp/nnet3${rvb_affix}/diag_ubm exp/nnet3${rvb_affix}/extractor || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  ivectordir=exp/nnet3${rvb_affix}/ivectors_${trainset}_rvb${num_data_reps}_hires
+
+  # We extract iVectors on all the train data, which will be what we train the
+  # system on.  With --utts-per-spk-max 2, the script.  pairs the utterances
+  # into twos, and treats each of these pairs as one speaker.  Note that these
+  # are extracted 'online'.
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${trainset}_rvb${num_data_reps}_hires data/${trainset}_rvb${num_data_reps}_hires_max2
+  
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+    data/${trainset}_rvb${num_data_reps}_hires_max2 exp/nnet3${rvb_affix}/extractor $ivectordir || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..116070ab50b
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# this script contains some common (shared) parts of the run_nnet*.sh scripts.
+. cmd.sh
+
+
+stage=0
+gmmdir=exp/tri4b
+speed_perturb=false
+trainset=train_clean
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if [ "$speed_perturb" == "true" ]; then
+  if [ $stage -le 1 ]; then
+    #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
+    # _sp stands for speed-perturbed
+
+    for datadir in ${trainset} ; do
+      utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
+      utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
+      utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
+      utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
+      rm -r data/temp1 data/temp2
+
+      mfccdir=mfcc_perturbed
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 \
+        data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_tmp
+
+      utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
+      utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
+      utils/fix_data_dir.sh data/${datadir}_sp
+      rm -r data/temp0 data/${datadir}_tmp
+    done
+  fi
+
+  if [ $stage -le 2 ]; then
+    #obtain the alignment of the perturbed data
+    steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
+      data/${trainset}_sp data/lang_nosp ${gmmdir} ${gmmdir}_ali_${trainset}_sp || exit 1
+  fi
+  trainset=${trainset}_sp
+fi
+
+if [ $stage -le 3 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=mfcc_hires
+  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+  #  utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  #fi
+
+  for datadir in ${trainset} ; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+  done
+
+  # We need to build a small system just because we need the LDA+MLLT transform
+  # to train the diag-UBM on top of.  We align a subset of training data for
+  # this purpose.
+  utils/subset_data_dir.sh data/${trainset}_hires 30000 data/train_30k_hires
+fi
+
+
+if [ $stage -le 4 ]; then
+  # Train a small system just for its LDA+MLLT transform.  We use --num-iters 13
+  # because after we get the transform (12th iter is the last), any further
+  # training is pointless.
+
+  mkdir exp -p exp/nnet3
+
+  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
+    --realign-iters "" \
+    --splice-opts "--left-context=3 --right-context=3" \
+    3000 10000 data/${trainset}_hires data/lang_nosp \
+    ${gmmdir}_ali_${trainset} exp/nnet3/tri2b
+fi
+
+
+if [ $stage -le 5 ]; then
+  # To train a diagonal UBM we don't need very much data, so use a small subset
+  # (actually, it's not that small: still around 100 hours).
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 700000 \
+    data/train_30k_hires 512 exp/nnet3/tri2b exp/nnet3/diag_ubm
+fi
+
+if [ $stage -le 6 ]; then
+  # iVector extractors can in general be sensitive to the amount of data, but
+  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
+  # we use just the 3k subset (about one fifth of the data, or 200 hours).
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/${trainset}_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  ivectordir=exp/nnet3/ivectors_${trainset}_hires
+
+  # We extract iVectors on all the train data, which will be what we train the
+  # system on.  With --utts-per-spk-max 2, the script.  pairs the utterances
+  # into twos, and treats each of these pairs as one speaker.  Note that these
+  # are extracted 'online'.
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${trainset}_hires data/${trainset}_hires_max2
+  
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 60 \
+    data/${trainset}_hires_max2 exp/nnet3/extractor $ivectordir || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh b/egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh
new file mode 100755
index 00000000000..a9b4a61c6d2
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Copyright 2017 Lucas Jo (Atlas Guide)
+# Apache 2.0
+
+if [ $# -ne "1" ]; then
+	echo "Usage: $0 <export_dir>"
+	echo "e.g.: $0 ./export"
+	exit 1
+fi
+
+tardir=$1
+srcdir=exp/nnet2_online/nnet_ms_a_online
+graphdir=exp/tri5b/graph_tgsmall
+oldlang=data/lang_test_tgsmall
+newlang=data/lang_test_fglarge
+oldlm=$oldlang/G.fst
+newlm=$newlang/G.carpa
+symtab=$newlang/words.txt
+
+for f in $srcdir/final.mdl $symtab $graphdir/HCLG.fst $srcdir/conf/mfcc.conf \
+	$srcdir/conf/ivector_extractor.conf $oldlm $newlm; do
+	[ ! -f $f ] && echo "export_model.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $tardir/conf
+cp -rpf $srcdir/final.mdl $tardir/final.mdl	# acoustic  model
+cp -rpf $symtab $tardir/words.txt			# word symbol table
+cp -rpf $graphdir/HCLG.fst $tardir/HCLG.fst	# HCLG 
+cp -rpf $srcdir/conf/mfcc.conf $tardir/conf/mfcc.conf
+cp -rpf $srcdir/conf/ivector_extractor.conf $tardir/conf/ivector_extractor.conf
+cp -rpf $oldlm $tardir/G.fst
+cp -rpf $newlm $tardir/G.carpa
diff --git a/egs/zeroth_korean/s5/local/online/run_nnet2_common.sh b/egs/zeroth_korean/s5/local/online/run_nnet2_common.sh
new file mode 100755
index 00000000000..d1ac0a2f5d2
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/online/run_nnet2_common.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+# this script contains some common (shared) parts of the run_nnet*.sh scripts.
+# Modified by Lucas Jo 2017 (Altas Guide)
+. cmd.sh
+
+
+stage=0
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if [ $stage -le 1 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=mfcc
+  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+  #  utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  #fi
+
+  for datadir in train_2x; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+  done
+
+  # now create some data subsets.
+  # mixed is the clean+other data.
+  # a is 1/5 of the data, b is 2/5th of it.
+  utils/subset_data_dir.sh data/train_2x_hires 3000 data/train_mixed_hires_a
+  utils/subset_data_dir.sh data/train_2x_hires 6000 data/train_mixed_hires_b
+fi
+
+if [ $stage -le 2 ]; then
+  # We need to build a small system just because we need the LDA+MLLT transform
+  # to train the diag-UBM on top of.  We align a subset of training data for
+  # this purpose.
+  utils/subset_data_dir.sh --utt-list <(awk '{print $1}' data/train_mixed_hires_a/utt2spk) \
+     data/train_2x data/train_2x_a
+
+  steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
+    data/train_2x_a data/lang exp/tri5b exp/nnet2_online/tri5b_ali_a
+fi
+
+if [ $stage -le 3 ]; then
+  # Train a small system just for its LDA+MLLT transform.  We use --num-iters 13
+  # because after we get the transform (12th iter is the last), any further
+  # training is pointless.
+    #5000 10000 data/train_mixed_hires_a data/lang \
+  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
+    --realign-iters "" \
+    --splice-opts "--left-context=3 --right-context=3" \
+    3000 20000 data/train_mixed_hires_a data/lang \
+    exp/nnet2_online/tri5b_ali_a exp/nnet2_online/tri6b
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p exp/nnet2_online
+  # To train a diagonal UBM we don't need very much data, so use a small subset
+  # (actually, it's not that small: still around 100 hours).
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 400000 \
+    data/train_mixed_hires_a 256 exp/nnet2_online/tri6b exp/nnet2_online/diag_ubm
+fi
+
+if [ $stage -le 5 ]; then
+  # iVector extractors can in general be sensitive to the amount of data, but
+  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
+  # we use just the 3k subset (about one fifth of the data, or 200 hours).
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/train_mixed_hires_b exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  ivectordir=exp/nnet2_online/ivectors_train_2x_hires
+  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+  #  utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  #fi
+
+  # We extract iVectors on all the train data, which will be what we train the
+  # system on.  With --utts-per-spk-max 2, the script.  pairs the utterances
+  # into twos, and treats each of these pairs as one speaker.  Note that these
+  # are extracted 'online'.
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_2x_hires data/train_2x_hires_max2
+  
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 60 \
+    data/train_2x_hires_max2 exp/nnet2_online/extractor $ivectordir || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh b/egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh
new file mode 100755
index 00000000000..d46e2f63667
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh
@@ -0,0 +1,267 @@
+#!/bin/bash
+
+# This is the "multi-splice" version of the online-nnet2 training script.
+# It's currently the best recipe.
+# You'll notice that we splice over successively larger windows as we go deeper
+# into the network.
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+
+. cmd.sh
+
+
+stage=0
+train_stage=-10
+use_gpu=true
+dir=exp/nnet2_online/nnet_ms_a
+exit_train_stage=-100
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
+EOF
+  fi
+  parallel_opts="--gpu 1"
+  num_threads=1
+  minibatch_size=512
+  # the _a is in case I want to change the parameters.
+else
+  # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
+  # almost the same, but this may be a little bit slow.
+  num_threads=16
+  minibatch_size=128
+  parallel_opts="--num-threads $num_threads"
+fi
+
+# do the common parts of the script.
+local/online/run_nnet2_common.sh --stage $stage
+
+echo "#### train_multiaplice_accel2.sh #####"
+if [ $stage -le 7 ]; then
+	#if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+	#  utils/create_split_dir.pl \
+	#   /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+	#fi
+
+	# The size of the system is kept rather smaller than the run_7a_960.sh system:
+	# this is because we want it to be small enough that we could plausibly run it
+	# in real-time.
+	#--num-epochs 8 --num-jobs-initial 3 --num-jobs-final 18 \
+	#--initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
+	#	--num-hidden-layers 6 \
+	#	--splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \
+	#	--num-hidden-layers 4 \
+	#	--splice-indexes "layer0/-1:0:1 layer1/-2:1 layer2/-4:2" \
+	#	--num-hidden-layers 3 \
+	#	--splice-indexes "layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" \
+	steps/nnet2/train_multisplice_accel2.sh --stage $train_stage \
+		--exit-stage $exit_train_stage \
+		--num-epochs 4 --num-jobs-initial 2 --num-jobs-final 2 \
+		--num-hidden-layers 6 \
+		--splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \
+		--feat-type raw \
+		--online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
+		--cmvn-opts "--norm-means=false --norm-vars=false" \
+		--num-threads "$num_threads" \
+		--minibatch-size "$minibatch_size" \
+		--parallel-opts "$parallel_opts" \
+		--io-opts "--max-jobs-run 12" \
+		--initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
+		--cmd "$decode_cmd" \
+		--pnorm-input-dim 3500 \
+		--pnorm-output-dim 350 \
+		--mix-up 12000 \
+		data/train_2x_hires data/lang exp/tri5b $dir  || exit 1;
+fi
+
+#if [ $stage -le 8 ]; then
+#  echo "#### $0: stage 8 #####"
+#  # dump iVectors for the testing data.
+#  for test in test_clean; do
+#    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 10 \
+#      data/${test}_hires exp/nnet2_online/extractor exp/nnet2_online/ivectors_$test || exit 1;
+#  done
+#fi
+
+#if [ $stage -le 9 ]; then
+#  # this does offline decoding that should give about the same results as the
+#  # real online decoding (the one with --per-utt true)
+#  for test in testData01; do
+#    steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \
+#      --online-ivector-dir exp/nnet2_online/ivectors_${test} \
+#      exp/tri5b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
+#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#      data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#      data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
+#  done
+#  for test in testData02; do
+#    steps/nnet2/decode.sh --nj 4 --cmd "$decode_cmd" --config conf/decode.config \
+#      --online-ivector-dir exp/nnet2_online/ivectors_${test} \
+#      exp/tri5b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
+#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#      data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#      data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
+#  done
+#fi
+
+
+if [ $stage -le 10 ]; then
+  echo "#### $0: stage 10 #####"
+  # If this setup used PLP features, we'd have to give the option --feature-type plp
+  # to the script below.
+  steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \
+    data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1;
+fi
+
+#if [ $stage -le 11 ]; then
+#  # do the actual online decoding with iVectors, carrying info forward from
+#  # previous utterances of the same speaker.
+#  for test in testData01; do
+#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 10 \
+#      exp/tri5b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
+#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}  || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
+#  done
+#  for test in testData02; do
+#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 4 \
+#      exp/tri5b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
+#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}  || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
+#  done
+#fi
+
+if [ $stage -le 12 ]; then
+  echo "#### $0: stage 12 #####"
+  # this version of the decoding treats each utterance separately
+  # without carrying forward speaker information.
+  #for test in test_clean test_noisy_snr20; do
+  for test in test_200 test_noisy_snr20_200; do
+    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
+		--per-utt true exp/tri5b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
+    #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+    #  data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt  || exit 1;
+    #steps/lmrescore_const_arpa.sh \
+    #  --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+    #  data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
+  done
+fi
+exit 0
+
+#if [ $stage -le 13 ]; then
+#  # this version of the decoding treats each utterance separately
+#  # without carrying forward speaker information, but looks to the end
+#  # of the utterance while computing the iVector (--online false)
+#  for test in testData01; do
+#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 10 \
+#      --per-utt true --online false exp/tri5b/graph_tgsmall data/$test \
+#        ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
+#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
+#  done
+#  for test in testData02; do
+#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 4 \
+#      --per-utt true --online false exp/tri5b/graph_tgsmall data/$test \
+#        ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
+#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
+#    steps/lmrescore_const_arpa.sh \
+#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
+#  done
+#fi
+
+#if [ $stage -le 14 ]; then
+#  # Creates example data dir.
+#  local/prepare_example_data.sh data/test_clean/ data/test_clean_example
+#
+#  # Copies example decoding script to current directory.
+#  cp local/decode_example.sh .
+#
+#  other_files=data/local/lm/lm_tgsmall.arpa.gz
+#  other_files="$other_files decode_example.sh"
+#  other_dirs=data/test_clean_example/
+#
+#  dist_file=librispeech_`basename ${dir}_online`.tgz
+#  utils/prepare_online_nnet_dist_build.sh \
+#    --other-files "$other_files" --other-dirs "$other_dirs" \
+#    data/lang ${dir}_online $dist_file
+#
+#  rm -rf decode_example.sh
+#  echo "NOTE: If you would like to upload this build ($dist_file) to kaldi-asr.org please check the process at http://kaldi-asr.org/uploads.html"
+#fi
+#
+#exit 0;
+####### Comment out the "exit 0" above to run the multi-threaded decoding. #####
+#
+#if [ $stage -le 15 ]; then
+#  # Demonstrate the multi-threaded decoding.
+#  test=dev_clean
+#  steps/online/nnet2/decode.sh --threaded true \
+#    --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
+#    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+#    ${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1;
+#fi
+#
+#if [ $stage -le 16 ]; then
+#  # Demonstrate the multi-threaded decoding with endpointing.
+#  test=dev_clean
+#  steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
+#    --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
+#    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+#    ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
+#fi
+#
+#if [ $stage -le 17 ]; then
+#  # Demonstrate the multi-threaded decoding with silence excluded
+#  # from iVector estimation.
+#  test=dev_clean
+#  steps/online/nnet2/decode.sh --threaded true  --silence-weight 0.0 \
+#    --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
+#    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+#    ${dir}_online/decode_${test}_tgsmall_utt_threaded_sil0.0 || exit 1;
+#fi
+#
+#exit 0;
diff --git a/egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh b/egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh
new file mode 100755
index 00000000000..b642bca9d26
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+
+# This script does discriminative training on top of the online, multi-splice
+# system trained in run_nnet2_ms.sh.
+# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
+# since the lattice generation runs in about real-time, so takes of the order of
+# 1000 hours of CPU time.
+#
+# Note: rather than using any features we have dumped on disk, this script
+# regenerates them from the wav data three times-- when we do lattice
+# generation, numerator alignment and discriminative training.  This made the
+# script easier to write and more generic, because we don't have to know where
+# the features and the iVectors are, but of course it's a little inefficient.
+# The time taken is dominated by the lattice generation anyway, so this isn't
+# a huge deal.
+
+. cmd.sh
+
+
+stage=0
+train_stage=-10
+use_gpu=true
+srcdir=exp/nnet2_online/nnet_ms_a
+criterion=smbr
+drop_frames=false  # only matters for MMI anyway.
+effective_lrate=0.000005
+num_jobs_nnet=6
+train_stage=-10 # can be used to start training in the middle.
+decode_start_epoch=0 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
+num_epochs=4
+cleanup=false  # run with --cleanup true --stage 6 to clean up (remove large things like denlats,
+               # alignments and degs).
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
+EOF
+  fi
+  parallel_opts="--gpu 1"
+  num_threads=1
+else
+  # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
+  # almost the same, but this may be a little bit slow.
+  num_threads=16
+  parallel_opts="--num-threads $num_threads"
+fi
+
+if [ ! -f ${srcdir}_online/final.mdl ]; then
+  echo "$0: expected ${srcdir}_online/final.mdl to exist; first run run_nnet2_ms.sh."
+  exit 1;
+fi
+
+
+if [ $stage -le 1 ]; then
+  nj=50  # this doesn't really affect anything strongly, except the num-jobs for one of
+         # the phases of get_egs_discriminative2.sh below.
+  num_threads_denlats=6
+  subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving
+              # max total slots = 80 * 6 = 480.
+  steps/nnet2/make_denlats.sh --cmd "$decode_cmd --mem 1G --num-threads $num_threads_denlats" \
+      --online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
+      --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
+     data/train_2x_hires data/lang $srcdir ${srcdir}_denlats || exit 1;
+
+  # the command below is a more generic, but slower, way to do it.
+  #steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd --mem 1G --num-threads $num_threads_denlats" \
+  #    --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
+  #   data/train_2x data/lang ${srcdir}_online ${srcdir}_denlats || exit 1;
+
+fi
+
+if [ $stage -le 2 ]; then
+  # hardcode no-GPU for alignment, although you could use GPU [you wouldn't
+  # get excellent GPU utilization though.]
+  nj=100 # have a high number of jobs because this could take a while, and we might
+         # have some stragglers.
+  use_gpu=no
+  gpu_opts=
+
+  steps/nnet2/align.sh  --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
+     --online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
+     --nj $nj data/train_2x_hires data/lang $srcdir ${srcdir}_ali || exit 1;
+
+  # the command below is a more generic, but slower, way to do it.
+  # steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
+  #    --nj $nj data/train_2x data/lang ${srcdir}_online ${srcdir}_ali || exit 1;
+fi
+
+
+if [ $stage -le 3 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{1,2,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
+  fi
+  # have a higher maximum num-jobs if
+  if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
+
+  steps/nnet2/get_egs_discriminative2.sh \
+    --cmd "$decode_cmd --max-jobs-run $max_jobs" \
+    --online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
+    --criterion $criterion --drop-frames $drop_frames \
+     data/train_2x_hires data/lang ${srcdir}{_ali,_denlats,/final.mdl,_degs} || exit 1;
+
+  # the command below is a more generic, but slower, way to do it.
+  #steps/online/nnet2/get_egs_discriminative2.sh \
+  #  --cmd "$decode_cmd --max-jobs-run $max_jobs" \
+  #  --criterion $criterion --drop-frames $drop_frames \
+  #   data/train_2x data/lang ${srcdir}{_ali,_denlats,_online,_degs} || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  steps/nnet2/train_discriminative2.sh --cmd "$decode_cmd $parallel_opts" \
+    --stage $train_stage \
+    --effective-lrate $effective_lrate \
+    --criterion $criterion --drop-frames $drop_frames \
+    --num-epochs $num_epochs \
+    --num-jobs-nnet 6 --num-threads $num_threads \
+      ${srcdir}_degs ${srcdir}_${criterion}_${effective_lrate} || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  dir=${srcdir}_${criterion}_${effective_lrate}
+  ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
+
+  for epoch in $(seq $decode_start_epoch $num_epochs); do
+    for test in test_clean test_noisy_snr20; do
+      #(
+        steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
+          --iter epoch$epoch exp/tri5b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
+        steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+          data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed}  || exit 1;
+        steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+          data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
+        steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+          data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
+      #) &
+      #) &
+    done
+  done
+  wait
+  for dir in $dir/decode*; do grep WER $dir/wer_* | utils/best_wer.sh; done
+fi
+
+if [ $stage -le 6 ] && $cleanup; then
+  # if you run with "--cleanup true --stage 6" you can clean up.
+  rm ${srcdir}_denlats/lat.*.gz || true
+  rm ${srcdir}_ali/ali.*.gz || true
+  steps/nnet2/remove_egs.sh ${srcdir}_degs || true
+fi
+
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/prepare_dict.sh b/egs/zeroth_korean/s5/local/prepare_dict.sh
new file mode 100755
index 00000000000..a4038ed7f43
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/prepare_dict.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Modified by Lucas Jo 2017 (Altas Guide)
+# Prepare dictionary
+
+if [ $# -ne 2 ]; then
+	echo "Usage: $0 <lm-dir> <dst-dir>"
+	echo "e.g.: /data/local/lm data/local/dict_nosp"
+	exit 1
+fi
+lm_dir=$1
+dst_dir=$2
+
+mkdir -p $dst_dir || exit 1;
+
+# this file is  a copy of the lexicon we obtained from download_lm.sh process
+lexicon_raw_nosil=$dst_dir/lexicon_raw_nosil.txt
+
+if [[ ! -s "$lexicon_raw_nosil" ]]; then
+	cp $lm_dir/zeroth_lexicon $lexicon_raw_nosil || exit 1
+fi
+
+silence_phones=$dst_dir/silence_phones.txt
+optional_silence=$dst_dir/optional_silence.txt
+nonsil_phones=$dst_dir/nonsilence_phones.txt
+extra_questions=$dst_dir/extra_questions.txt
+
+echo "Preparing phone lists and clustering questions"
+(echo SIL; echo SPN;) > $silence_phones
+#( echo SIL; echo BRH; echo CGH; echo NSN ; echo SMK; echo UM; echo UHH ) > $silence_phones
+echo SIL > $optional_silence
+# nonsilence phones; on each line is a list of phones that correspond
+# really to the same base phone.
+awk '{for (i=2; i<=NF; ++i) { print $i; gsub(/[0-9]/, "", $i); print $i}}' $lexicon_raw_nosil |\
+	sort -u |\
+	perl -e 'while(<>){
+chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_";
+$phones_of{$1} .= "$_ "; }
+foreach $list (values %phones_of) {print $list . "\n"; } ' \
+	> $nonsil_phones || exit 1;
+# A few extra questions that will be added to those obtained by
+# automatically clustering
+# the "real" phones.  These ask about stress; there's also one for
+# silence.
+cat $silence_phones| awk '{printf("%s ", $1);} END{printf "\n";}' > $extra_questions || exit 1;
+cat $nonsil_phones | perl -e 'while(<>){ foreach $p (split(" ", $_)){
+	$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+		>> $extra_questions || exit 1;
+
+echo "$(wc -l <$silence_phones) silence phones saved to: $silence_phones"
+echo "$(wc -l <$optional_silence) optional silence saved to: $optional_silence"
+echo "$(wc -l <$nonsil_phones) non-silence phones saved to: $nonsil_phones"
+echo "$(wc -l <$extra_questions) extra triphone	clustering-related questions saved to: $extra_questions"
+
+#(echo '!SIL SIL'; echo '[BREATH] BRH'; echo '[NOISE] NSN'; echo '[COUGH] CGH';
+# echo '[SMACK] SMK'; echo '[UM] UM'; echo '[UH] UHH'
+#  echo '<UNK> NSN' ) | \
+(echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |\
+cat - $lexicon_raw_nosil | sort | uniq >$dst_dir/lexicon.txt
+echo "Lexicon text file saved as: $dst_dir/lexicon.txt"
+exit 0
+
diff --git a/egs/zeroth_korean/s5/local/score.sh b/egs/zeroth_korean/s5/local/score.sh
new file mode 100755
index 00000000000..c812199fc98
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/score.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#           2014  Guoguo Chen
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+decode_mbr=true
+word_ins_penalty=0.0,0.5,1.0
+min_lmwt=7
+max_lmwt=17
+iter=final
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
+    lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+    lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+    lattice-best-path --word-symbol-table=$symtab \
+      ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
+done
+
+# Note: the double level of quoting for the sed command
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
+    cat $dir/scoring/LMWT.$wip.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+    compute-wer --text --mode=present \
+    ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+done
+
+exit 0;
diff --git a/egs/zeroth_korean/s5/local/updateSegmentation.sh b/egs/zeroth_korean/s5/local/updateSegmentation.sh
new file mode 100755
index 00000000000..e892f902837
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/updateSegmentation.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright 2017 Lucas Jo (Atlas Guide)
+# Apache 2.0
+
+# do this when the segmentation rule is changed
+dataDir=$1
+lmDir=$2
+
+exists(){
+	command -v "$1" >/dev/null 2>&1
+}
+
+# check morfessor installation 
+if ! exists morfessor; then
+	echo "Please, install Morfessor"
+	exit 1
+fi
+
+trans=$dataDir/text
+echo "Re-segment transcripts: $trans --------------------------------------------"
+if [ ! -f $trans ]; then
+	echo "transcription file is not found in "$dataDir
+	exit 1
+fi
+cp $trans $trans".old"
+awk '{print $1}' $trans".old" > $trans"_tmp_index"
+cut -d' ' -f2- $trans".old" |\
+	sed -E 's/\s+/ /g; s/^\s//g; s/\s$//g' |\
+	morfessor -l $lmDir/zeroth_morfessor.seg -T - -o - \
+	--output-format '{analysis} ' --output-newlines \
+	--nosplit-re '[0-9\[\]\(\){}a-zA-Z&.,\-]+' \
+	| paste -d" " $trans"_tmp_index" - > $trans
+rm -f $trans"_tmp_index"
+
+#transcripList=$(find $dataDir -name "*.norm.txt" -type f | sort)
+#for transcript in $transcripList;
+#do
+#	echo "read: " $transcript
+#	cat $transcript | awk '{print $1;}' > tmp
+#	cat $transcript | awk '{$1="";print $0;}' | \
+#	local/strip.py | \
+#	#morfessor -l $lmDir/data/_lexicon_/mergedCorpus.model4.reduced -T - -o tmp2 --output-format '{analysis} ' --output-newlines
+#	morfessor -l $lmDir/zeroth_morfessor.seg -T - -o tmp2 --output-format '{analysis} ' --output-newlines
+#	#$lmDir/data/_lm_/seg2sentence.py tmp2 > tmp3
+#
+#	array=(${transcript//\./ })
+#	echo "write: " ${array[0]}.${array[1]}.txt
+#	paste -d" " tmp tmp2 > ${array[0]}.${array[1]}.txt
+#done
+#rm -f tmp*
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
new file mode 100755
index 00000000000..48a3834050c
--- /dev/null
+++ b/egs/zeroth_korean/s5/run.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+#
+# Based mostly on the WSJ/Librispeech recipe. The training database is #####,
+# it consists of 51hrs korean speech with cleaned automatic transcripts:
+#
+# http://www.openslr.org/resources (Mirror).
+#
+# Copyright  2018  Atlas Guide (Author : Lucas Jo)
+#            2018  Gridspace Inc. (Author: Wonkyum Lee)
+#
+# Apache 2.0
+#
+
+# Check list before start
+# 1. locale setup
+# 2. pre-installed package: awscli, Morfessor-2.0.1, flac, sox, same cuda library, unzip
+# 3. pre-install or symbolic link for easy going: rirs_noises.zip (takes pretty long time)
+# 4. parameters: nCPU, num_jobs_initial, num_jobs_final, --max-noises-per-minute
+
+db_dir=./db
+nCPU=16
+
+. ./cmd.sh
+. ./path.sh
+
+# you might not want to do this for interactive shells.
+set -e
+
+startTime=$(date +'%F-%H-%M')
+echo "started at" $startTime
+
+# download the data.  
+local/download_and_untar.sh $db_dir
+
+# format the data as Kaldi data directories
+for part in train_data_01 test_data_01; do
+	# use underscore-separated names in data directories.
+	local/data_prep.sh $db_dir/$part data/$(echo $part | sed s/-/_/g)
+done
+
+# update segmentation of transcripts
+for part in train_data_01 test_data_01; do
+	local/updateSegmentation.sh data/$part data/local/lm
+done
+
+# prepare dictionary and language model 
+local/prepare_dict.sh data/local/lm data/local/dict_nosp
+
+utils/prepare_lang.sh data/local/dict_nosp \
+	"<UNK>" data/local/lang_tmp_nosp data/lang_nosp
+
+local/format_lms.sh --src-dir data/lang_nosp data/local/lm
+
+# Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
+# it takes long time and do this again after computing silence prob.
+# you can do comment out here this time
+
+#utils/build_const_arpa_lm.sh data/local/lm/zeroth.lm.tg.arpa.gz \
+#	data/lang_nosp data/lang_nosp_test_tglarge
+#utils/build_const_arpa_lm.sh data/local/lm/zeroth.lm.fg.arpa.gz \
+#	  data/lang_nosp data/lang_nosp_test_fglarge
+
+# Feature extraction (MFCC)
+mfccdir=mfcc
+hostInAtlas="ares hephaestus jupiter neptune"
+if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]]; then
+  mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
+  utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi/s5/$mfcc/storage \
+    $mfccdir/storage
+fi
+for part in train_data_01 test_data_01; do
+	steps/make_mfcc.sh --cmd "$train_cmd" --nj $nCPU data/$part exp/make_mfcc/$part $mfccdir
+	steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
+done
+
+# ... and then combine data sets into one (for later extension)
+utils/combine_data.sh \
+  data/train_clean data/train_data_01
+
+utils/combine_data.sh \
+  data/test_clean data/test_data_01
+
+# Make some small data subsets for early system-build stages.
+utils/subset_data_dir.sh --shortest data/train_clean 2000 data/train_2kshort
+utils/subset_data_dir.sh data/train_clean 5000 data/train_5k
+utils/subset_data_dir.sh data/train_clean 10000 data/train_10k
+
+echo "#### Monophone Training ###########"
+# train a monophone system & align
+steps/train_mono.sh --boost-silence 1.25 --nj $nCPU --cmd "$train_cmd" \
+	data/train_2kshort data/lang_nosp exp/mono
+steps/align_si.sh --boost-silence 1.25 --nj $nCPU --cmd "$train_cmd" \
+	data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
+
+echo "#### Triphone Training, delta + delta-delta ###########"
+# train a first delta + delta-delta triphone system on a subset of 5000 utterancesa
+# number of maximum pdf, gaussian (under/over fitting)
+#  recognition result 
+steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
+    2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1
+steps/align_si.sh --nj $nCPU --cmd "$train_cmd" \
+  data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k
+
+echo "#### Triphone Training, LDA+MLLT ###########"
+# train an LDA+MLLT system.
+steps/train_lda_mllt.sh --cmd "$train_cmd" \
+   --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
+   data/train_10k data/lang_nosp exp/tri1_ali_10k exp/tri2b
+
+# Align a 10k utts subset using the tri2b model
+steps/align_si.sh  --nj $nCPU --cmd "$train_cmd" --use-graphs true \
+  data/train_clean data/lang_nosp exp/tri2b exp/tri2b_ali_train_clean
+
+echo "#### Triphone Training, LDA+MLLT+SAT ###########"
+# Train tri3b, which is LDA+MLLT+SAT on 10k utts
+#steps/train_sat.sh --cmd "$train_cmd" 3000 25000 \
+steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \
+  data/train_clean data/lang_nosp exp/tri2b_ali_train_clean exp/tri3b
+
+# Now we compute the pronunciation and silence probabilities from training data,
+# and re-create the lang directory.
+# silence transition probability ...
+steps/get_prons.sh --cmd "$train_cmd" \
+      data/train_clean data/lang_nosp exp/tri3b
+
+utils/dict_dir_add_pronprobs.sh --max-normalize true \
+      data/local/dict_nosp \
+        exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \
+          exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict
+
+utils/prepare_lang.sh data/local/dict \
+      "<UNK>" data/local/lang_tmp data/lang
+
+local/format_lms.sh --src-dir data/lang data/local/lm
+
+utils/build_const_arpa_lm.sh \
+      data/local/lm/zeroth.lm.tg.arpa.gz data/lang data/lang_test_tglarge
+utils/build_const_arpa_lm.sh \
+      data/local/lm/zeroth.lm.fg.arpa.gz data/lang data/lang_test_fglarge
+
+# align the entire train_clean using the tri3b model
+steps/align_fmllr.sh --nj $nCPU --cmd "$train_cmd" \
+  data/train_clean data/lang exp/tri3b exp/tri3b_ali_train_clean
+
+echo "#### SAT again on train_clean ###########"
+# train another LDA+MLLT+SAT system on the entire subset
+steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
+  data/train_clean data/lang exp/tri3b_ali_train_clean exp/tri4b
+
+# decode using the tri4b model with pronunciation and silence probabilities
+utils/mkgraph.sh \
+  data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall
+
+# the size is properly set?
+utils/subset_data_dir.sh data/test_clean 200 data/test_200
+
+for test in test_200; do
+  nspk=$(wc -l <data/${test}/spk2utt)
+  steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd" \
+    exp/tri4b/graph_tgsmall data/$test \
+    exp/tri4b/decode_tgsmall_$test
+  #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+  #  data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test
+  steps/lmrescore_const_arpa.sh \
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+    data/$test exp/tri4b/decode_{tgsmall,tglarge}_$test
+  steps/lmrescore_const_arpa.sh \
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+    data/$test exp/tri4b/decode_{tgsmall,fglarge}_$test
+done
+
+# align train_clean_100 using the tri4b model
+steps/align_fmllr.sh --nj $nCPU --cmd "$train_cmd" \
+	  data/train_clean data/lang exp/tri4b exp/tri4b_ali_train_clean
+
+finishTime=$(date +'%F-%H-%M')
+echo "GMM trainig is finished at" $finishTime
+exit
+## online chain recipe using only clean data set
+echo "#### online chain training  ###########"
+## check point: sudo nvidia-smi --compute-mode=3 if you have multiple GPU's
+#local/chain/run_tdnn_1a.sh
+#local/chain/run_tdnn_1b.sh
+#local/chain/multi_condition/run_tdnn_lstm_1e.sh --nj $nCPU
+local/chain/multi_condition/run_tdnn_1n.sh --nj $nCPU 
+#local/chain/run_tdnn_opgru_1c.sh --nj $nCPU
+
+
+finishTime=$(date +'%F-%H-%M')
+echo "DNN trainig is finished at" $finishTime
+echo "started at" $startTime
+echo "finished at" $finishTime
+exit 0;
+

From 9dc8f8194a37dd5a6328f625311d1c2501e2824d Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Mon, 9 Jul 2018 20:01:47 -0700
Subject: [PATCH 03/26] cleaning script

- simplified the script
- delete unnecessary scripts and comments
---
 egs/zeroth_korean/s5/RESULTS                  |  24 --
 egs/zeroth_korean/s5/conf/queue.conf          |  10 -
 .../chain/multi_condition/run_tdnn_1b.sh      | 299 -------------
 .../chain/multi_condition/run_tdnn_1n.sh      | 302 -------------
 .../chain/multi_condition/run_tdnn_lstm_1e.sh | 328 ---------------
 egs/zeroth_korean/s5/local/chain/run_tdnn.sh  |   1 +
 .../s5/local/chain/run_tdnn_1b.sh             | 271 ------------
 .../s5/local/chain/run_tdnn_1n.sh             | 275 ------------
 .../s5/local/chain/run_tdnn_lstm_1e.sh        | 290 -------------
 .../s5/local/chain/run_tdnn_opgru.sh          |   1 +
 .../local/chain/{ => tuning}/run_tdnn_1a.sh   |  87 ++--
 .../run_tdnn_opgru_1a.sh}                     |   6 +-
 egs/zeroth_korean/s5/local/data_prep.sh       |  10 +-
 .../s5/local/download_and_untar.sh            |   2 +-
 .../s5/local/multi_condition/copy_ali_dir.sh  |  78 ----
 .../s5/local/nnet2/run_5a_recData01.sh        |  76 ----
 .../s5/local/nnet2/run_5a_train_2x.sh         | 105 -----
 .../s5/local/nnet2/run_5a_train_clean.sh      |  77 ----
 .../multi_condition/run_ivector_common.sh     | 214 ----------
 .../s5/local/nnet3/run_ivector_common.sh      |  12 +-
 .../local/online/export_online_nnet2_model.sh |  33 --
 .../s5/local/online/run_nnet2_common.sh       | 101 -----
 .../s5/local/online/run_nnet2_ms.sh           | 267 ------------
 .../s5/local/online/run_nnet2_ms_disc.sh      | 164 --------
 .../s5/local/updateSegmentation.sh            |  19 +-
 egs/zeroth_korean/s5/run.sh                   | 397 +++++++++++-------
 26 files changed, 297 insertions(+), 3152 deletions(-)
 delete mode 100644 egs/zeroth_korean/s5/RESULTS
 delete mode 100644 egs/zeroth_korean/s5/conf/queue.conf
 delete mode 100755 egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh
 delete mode 100755 egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh
 delete mode 100755 egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh
 create mode 120000 egs/zeroth_korean/s5/local/chain/run_tdnn.sh
 delete mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh
 delete mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh
 delete mode 100755 egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh
 create mode 120000 egs/zeroth_korean/s5/local/chain/run_tdnn_opgru.sh
 rename egs/zeroth_korean/s5/local/chain/{ => tuning}/run_tdnn_1a.sh (77%)
 rename egs/zeroth_korean/s5/local/chain/{run_tdnn_opgru_1c.sh => tuning/run_tdnn_opgru_1a.sh} (98%)
 delete mode 100755 egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh
 delete mode 100755 egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh
 delete mode 100755 egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh
 delete mode 100755 egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh
 delete mode 100755 egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh
 delete mode 100755 egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh
 delete mode 100755 egs/zeroth_korean/s5/local/online/run_nnet2_common.sh
 delete mode 100755 egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh
 delete mode 100755 egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh

diff --git a/egs/zeroth_korean/s5/RESULTS b/egs/zeroth_korean/s5/RESULTS
deleted file mode 100644
index 8a189e3f501..00000000000
--- a/egs/zeroth_korean/s5/RESULTS
+++ /dev/null
@@ -1,24 +0,0 @@
-# In the results below, "tgsmall" is the pruned 3-gram LM, which is used for lattice generation.
-# The following language models are then used for rescoring:
-# a) tglarge- the full, non-pruned 3-gram LM
-# b) fglarge- non-pruned 4-gram LM
-# The "test-clean" sets generally contain, relatively cleaner Korean speech,
-# the "test_200" are subset of "test-clean", designed for quick evaluation
-
-### SAT GMM model trained on the "train-01" set (51 hours "clean" speech)
-decode_fglarge_test_200/wer_14_0.5:%WER 21.17 [ 873 / 4124, 93 ins, 172 del, 608 sub ]
-decode_tglarge_test_200/wer_15_0.0:%WER 21.46 [ 885 / 4124, 101 ins, 168 del, 616 sub ]
-decode_tgsmall_test_200/wer_14_0.5:%WER 33.83 [ 1395 / 4124, 85 ins, 330 del, 980 sub ]
-decode_tgsmall_test_200.si/wer_14_0.0:%WER 46.02 [ 1898 / 4124, 133 ins, 389 del, 1376 sub ]
-
-### Chain model trained on the "train-01" set 
-tdnn1n_online/decode_fglarge_test_200/wer_13_1.0:%WER 11.25 [ 464 / 4124, 65 ins, 78 del, 321 sub ]
-tdnn1n_online/decode_tgsmall_test_200/wer_13_0.0:%WER 18.09 [ 746 / 4124, 89 ins, 123 del, 534 sub ]
-tdnn_opgru_1c_sp_online/decode_fglarge_test_200/wer_8_1.0:%WER 9.00 [ 371 / 4124, 50 ins, 63 del, 258 sub ]
-tdnn_opgru_1c_sp_online/decode_tgsmall_test_200/wer_8_0.5:%WER 14.06 [ 580 / 4124, 62 ins, 92 del, 426 sub ]
-
-### Chain model trained on the "train-01" set with multi-conditioned data augmentation
-tdnn1n_rvb_online/decode_fglarge_test_200/wer_10_0.0:%WER 10.11 [ 417 / 4124, 73 ins, 57 del, 287 sub ]
-tdnn1n_rvb_online/decode_tgsmall_test_200/wer_8_0.5:%WER 16.27 [ 671 / 4124, 87 ins, 91 del, 493 sub ]
-tdnn_lstm_1e_rvb_online/decode_fglarge_test_200/wer_13_0.0:%WER 11.47 [ 473 / 4124, 74 ins, 61 del, 338 sub ]
-tdnn_lstm_1e_rvb_online/decode_tgsmall_test_200/wer_12_1.0:%WER 16.97 [ 700 / 4124, 72 ins, 130 del, 498 sub ]
diff --git a/egs/zeroth_korean/s5/conf/queue.conf b/egs/zeroth_korean/s5/conf/queue.conf
deleted file mode 100644
index 2aa9ee6a211..00000000000
--- a/egs/zeroth_korean/s5/conf/queue.conf
+++ /dev/null
@@ -1,10 +0,0 @@
-# Default configuration
-command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
-option mem=* -l mem_free=$0,ram_free=$0
-option mem=0          # Do not add anything to qsub_opts
-option num_threads=* -pe smp $0
-option num_threads=1  # Do not add anything to qsub_opts
-option max_jobs_run=* -tc $0
-default gpu=0
-option gpu=0
-option gpu=* -l gpu=$0
diff --git a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh
deleted file mode 100755
index c8ebaeb2e05..00000000000
--- a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1b.sh
+++ /dev/null
@@ -1,299 +0,0 @@
-#!/bin/bash
-
-set -e -o pipefail
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-nj=30
-train_set=train_clean
-num_data_reps=1        # number of reverberated copies of data to generate
-speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
-                 # should have alignments for the specified training data.
-nnet3_affix=_rvb       # affix for exp dirs, e.g. it was _cleaned in tedlium.
-
-# Options which are not passed through to run_ivector_common.sh
-affix=1b_rvb  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-
-# LSTM/chain options
-train_stage=-10
-xent_regularize=0.1
-max_param_change=2.0
-
-# training chunk-options
-chunk_width=150
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-
-# training options
-srand=0
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=4
-minibatch_size=128
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-remove_egs=true
-
-
-#decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-local/nnet3/multi_condition/run_ivector_common.sh --stage $stage  \
-                                 --speed-perturb ${speed_perturb} \
-                                 --num-data-reps ${num_data_reps} \
-                                 --rvb-affix ${nnet3_affix}
-
-if [ "$speed_perturb" == "true" ]; then
-  train_set=${train_set}_sp
-fi
-
-gmm_dir=exp/${gmm}
-clean_lat_dir=exp/${gmm}_${train_set}_lats
-lat_dir=${clean_lat_dir}_rvb${num_data_reps}
-dir=exp/chain_rvb/tdnn${affix}
-train_data_dir=data/${train_set}_rvb${num_data_reps}_hires
-train_ivector_dir=exp/nnet3_rvb/ivectors_${train_set}_rvb${num_data_reps}_hires
-lores_train_data_dir=data/${train_set}
-
-# note: you don't necessarily have to change the treedir name
-# each time you do a new experiment-- only if you change the
-# configuration in a way that affects the tree.
-tree_dir=exp/chain_rvb/tree_a
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-if [ -d exp/${gmm}_ali_${train_set} ]; then 
-    ali_dir=exp/${gmm}_ali_${train_set}
-else
-    echo "$0: Using Alignment from GMM dir at ${gmm}..."
-    ali_dir=${gmm_dir}
-fi
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 8 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
-    data/lang $gmm_dir $clean_lat_dir
-  rm $clean_lat_dir/fsts.*.gz # save space
-  # Create the lattices for the reverberated data
-
-  # We use the lattices/alignments from the clean data for the reverberated data.
-  mkdir -p $lat_dir/temp/
-  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
-
-  # copy the lattices for the reverberated data
-  rm -f $lat_dir/temp/combined_lats.scp
-  touch $lat_dir/temp/combined_lats.scp
-  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
-  for i in `seq 0 $num_data_reps`; do
-    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
-  done
-  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
-
-  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
-  echo "1" > $lat_dir/num_jobs
-
-  # copy other files from original lattice dir
-  for f in cmvn_opts final.mdl splice_opts tree; do
-    cp $clean_lat_dir/$f $lat_dir/$f
-  done
-
-fi
-
-if [ $stage -le 10 ]; then
-  # Build a tree using our new topology.  
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor 3 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-if [ $stage -le 11 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=100 name=ivector
-  input dim=40 name=input
-
-  # please note that it is important to have input layer with the name=input
-  # as the layer immediately preceding the fixed-affine-layer to enable
-  # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
-
-  # the first splicing is moved before the lda layer, so no splicing here
-  relu-renorm-layer name=tdnn1 dim=512
-  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
-  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
-  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
-  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
-  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
-
-  ## adding the layers for chain branch
-  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' models... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 12 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
-    --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
-    --chain.apply-deriv-weights false \
-    --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.srand $srand \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --egs.chunk-width $chunk_width \
-    --egs.chunk-left-context $chunk_left_context \
-    --egs.chunk-right-context $chunk_right_context \
-    --egs.dir "$common_egs_dir" \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs $remove_egs \
-    --use-gpu true \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 13 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/lang/check_phones_compatible.sh \
-    data/lang_test_tgsmall/phones.txt $lang/phones.txt
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test_tgsmall \
-    $tree_dir $tree_dir/graph_tgsmall || exit 1;
-fi
-
-if $test_online_decoding && [ $stage -le 14 ]; then
-  # note: if the features change (e.g. you add pitch features), you will have to
-  # change the options of the following command line.
-  steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf \
-    $lang exp/nnet3_rvb/extractor ${dir} ${dir}_online
-
-  rm $dir/.error 2>/dev/null || true
-
-  for data in $test_sets; do
-    (
-      data_affix=$(echo $data | sed s/test_//)
-      nspk=$(wc -l <data/${data}/spk2utt)
-      # note: we just give it "data/${data}" as it only uses the wav.scp, the
-      # feature type does not matter.
-      for lmtype in tgsmall; do
-        steps/online/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj $nspk --cmd "$decode_cmd" \
-          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
-      done
-      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
-        data/lang_test_{tgsmall,fglarge} \
-       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh
deleted file mode 100755
index a89882a7913..00000000000
--- a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_1n.sh
+++ /dev/null
@@ -1,302 +0,0 @@
-#!/bin/bash
-
-set -e -o pipefail
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-nj=30
-train_set=train_clean
-num_data_reps=1        # number of reverberated copies of data to generate
-speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
-                 # should have alignments for the specified training data.
-nnet3_affix=_rvb       # affix for exp dirs, e.g. it was _cleaned in tedlium.
-
-# Options which are not passed through to run_ivector_common.sh
-affix=1n_rvb  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-
-# LSTM/chain options
-train_stage=-10
-xent_regularize=0.1
-max_param_change=2.0
-
-# training chunk-options
-get_egs_stage=-10
-chunk_width=150,110,100
-
-# training options
-num_jobs_initial=2
-num_jobs_final=7
-num_epochs=4
-minibatch_size=128
-initial_effective_lrate=0.0015
-final_effective_lrate=0.0002
-remove_egs=true
-
-
-#decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-local/nnet3/multi_condition/run_ivector_common.sh --stage $stage  \
-                                 --speed-perturb ${speed_perturb} \
-                                 --num-data-reps ${num_data_reps} \
-                                 --rvb-affix ${nnet3_affix}
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-if [ "$speed_perturb" == "true" ]; then
-  train_set=${train_set}_sp
-fi
-
-gmm_dir=exp/${gmm}
-clean_lat_dir=exp/${gmm}_${train_set}_lats
-lat_dir=${clean_lat_dir}_rvb${num_data_reps}
-dir=exp/chain_rvb/tdnn${affix}
-train_data_dir=data/${train_set}_rvb${num_data_reps}_hires
-train_ivector_dir=exp/nnet3_rvb/ivectors_${train_set}_rvb${num_data_reps}_hires
-lores_train_data_dir=data/${train_set}
-
-# note: you don't necessarily have to change the treedir name
-# each time you do a new experiment-- only if you change the
-# configuration in a way that affects the tree.
-tree_dir=exp/chain_rvb/tree_a
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-if [ -d exp/${gmm}_ali_${train_set} ]; then 
-    ali_dir=exp/${gmm}_ali_${train_set}
-else
-    echo "$0: Using Alignment from GMM dir at ${gmm}..."
-    ali_dir=${gmm_dir}
-fi
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 8 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
-    data/lang $gmm_dir $clean_lat_dir
-  rm $clean_lat_dir/fsts.*.gz # save space
-  # Create the lattices for the reverberated data
-
-  # We use the lattices/alignments from the clean data for the reverberated data.
-  mkdir -p $lat_dir/temp/
-  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
-
-  # copy the lattices for the reverberated data
-  rm -f $lat_dir/temp/combined_lats.scp
-  touch $lat_dir/temp/combined_lats.scp
-  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
-  for i in `seq 0 $num_data_reps`; do
-    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
-  done
-  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
-
-  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
-  echo "1" > $lat_dir/num_jobs
-
-  # copy other files from original lattice dir
-  for f in cmvn_opts final.mdl splice_opts tree; do
-    cp $clean_lat_dir/$f $lat_dir/$f
-  done
-
-fi
-
-if [ $stage -le 10 ]; then
-  # Build a tree using our new topology.  
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor 3 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-if [ $stage -le 11 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  opts="l2-regularize=0.002"
-  linear_opts="orthonormal-constraint=1.0"
-  output_opts="l2-regularize=0.0005 bottleneck-dim=256"
-
-  mkdir -p $dir/configs
-
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=100 name=ivector
-  input dim=40 name=input
-
-  # please note that it is important to have input layer with the name=input
-  # as the layer immediately preceding the fixed-affine-layer to enable
-  # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
-
-  # the first splicing is moved before the lda layer, so no splicing here
-  relu-batchnorm-layer name=tdnn1 $opts dim=1280
-  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
-  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
-  linear-component name=tdnn3l dim=256 $linear_opts
-  relu-batchnorm-layer name=tdnn3 $opts dim=1280
-  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
-  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
-  linear-component name=tdnn5l dim=256 $linear_opts
-  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
-  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn7l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
-  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
-  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn8l,tdnn6l) dim=1280
-  linear-component name=prefinal-l dim=256 $linear_opts
-
-  relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
-  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
-
-  relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 12 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
-    --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.0 \
-    --chain.apply-deriv-weights false \
-    --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --egs.stage $get_egs_stage \
-    --egs.chunk-width $chunk_width \
-    --egs.dir "$common_egs_dir" \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs $remove_egs \
-    --use-gpu true \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 13 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/lang/check_phones_compatible.sh \
-    data/lang_test_tgsmall/phones.txt $lang/phones.txt
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test_tgsmall \
-    $tree_dir $tree_dir/graph_tgsmall || exit 1;
-fi
-
-if $test_online_decoding && [ $stage -le 14 ]; then
-  # note: if the features change (e.g. you add pitch features), you will have to
-  # change the options of the following command line.
-  steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf \
-    $lang exp/nnet3_rvb/extractor ${dir} ${dir}_online
-
-  rm $dir/.error 2>/dev/null || true
-
-  for data in $test_sets; do
-    (
-      data_affix=$(echo $data | sed s/test_//)
-      nspk=$(wc -l <data/${data}/spk2utt)
-      # note: we just give it "data/${data}" as it only uses the wav.scp, the
-      # feature type does not matter.
-      for lmtype in tgsmall; do
-        steps/online/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj $nspk --cmd "$decode_cmd" \
-          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
-      done
-      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
-        data/lang_test_{tgsmall,fglarge} \
-       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh b/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh
deleted file mode 100755
index bb6a2fc1be9..00000000000
--- a/egs/zeroth_korean/s5/local/chain/multi_condition/run_tdnn_lstm_1e.sh
+++ /dev/null
@@ -1,328 +0,0 @@
-#!/bin/bash
-
-set -e -o pipefail
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-nj=20
-train_set=train_clean
-num_data_reps=1        # number of reverberated copies of data to generate
-speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
-                 # should have alignments for the specified training data.
-nnet3_affix=_rvb       # affix for exp dirs, e.g. it was _cleaned in tedlium.
-
-# Options which are not passed through to run_ivector_common.sh
-affix=1e_rvb  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-
-# LSTM/chain options
-train_stage=-10
-get_egs_stage=-10
-xent_regularize=0.01
-self_repair_scale=0.00001
-label_delay=5
-max_param_change=2.0
-
-# training chunk-options
-chunk_left_context=40
-chunk_right_context=0
-frames_per_chunk=140,100,160
-
-frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
-extra_left_context=50
-extra_right_context=0
-
-# training options
-srand=0
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=4
-minibatch_size=128
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-remove_egs=true
-
-
-#decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-local/nnet3/multi_condition/run_ivector_common.sh --stage $stage  \
-                                 --speed-perturb ${speed_perturb} \
-                                 --num-data-reps ${num_data_reps} \
-                                 --rvb-affix ${nnet3_affix} \
-								 --nj $nj
-
-
-#echo "run_ivector_common.sh is finished" |\
-#	mail -s "[alarm]finishing" -aFrom:jupiter lucasjo@goodatlas.com
-echo "run_ivector_common.sh trainig is finished at" $finishTime
-
-if [ "$speed_perturb" == "true" ]; then
-  train_set=${train_set}_sp
-fi
-
-gmm_dir=exp/${gmm}
-clean_lat_dir=exp/${gmm}_${train_set}_lats
-lat_dir=${clean_lat_dir}_rvb${num_data_reps}
-dir=exp/chain_rvb/tdnn_lstm_${affix}
-train_data_dir=data/${train_set}_rvb${num_data_reps}_hires
-train_ivector_dir=exp/nnet3_rvb/ivectors_${train_set}_rvb${num_data_reps}_hires
-lores_train_data_dir=data/${train_set}
-
-# note: you don't necessarily have to change the treedir name
-# each time you do a new experiment-- only if you change the
-# configuration in a way that affects the tree.
-tree_dir=exp/chain_rvb/tree_a
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-if [ -d exp/${gmm}_ali_${train_set} ]; then 
-    ali_dir=exp/${gmm}_ali_${train_set}
-else
-    echo "$0: Using Alignment from GMM dir at ${gmm}..."
-    ali_dir=${gmm_dir}
-fi
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 8 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
-    data/lang $gmm_dir $clean_lat_dir
-  rm $clean_lat_dir/fsts.*.gz # save space
-  # Create the lattices for the reverberated data
-
-  # We use the lattices/alignments from the clean data for the reverberated data.
-  mkdir -p $lat_dir/temp/
-  lattice-copy "ark:gunzip -c $clean_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp
-
-  # copy the lattices for the reverberated data
-  rm -f $lat_dir/temp/combined_lats.scp
-  touch $lat_dir/temp/combined_lats.scp
-  # Here prefix "rev0_" represents the clean set, "rev1_" represents the reverberated set
-  for i in `seq 0 $num_data_reps`; do
-    cat $lat_dir/temp/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp
-  done
-  sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp
-
-  lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1;
-  echo "1" > $lat_dir/num_jobs
-
-  # copy other files from original lattice dir
-  for f in cmvn_opts final.mdl splice_opts tree; do
-    cp $clean_lat_dir/$f $lat_dir/$f
-  done
-
-fi
-
-if [ $stage -le 10 ]; then
-  # Build a tree using our new topology.  
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor 3 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-if [ $stage -le 11 ]; then
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-
-  lstm_opts="decay-time=20"
-
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=100 name=ivector
-  input dim=40 name=input
-
-  # please note that it is important to have input layer with the name=input
-  # as the layer immediately preceding the fixed-affine-layer to enable
-  # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
-
-  # the first splicing is moved before the lda layer, so no splicing here
-  relu-renorm-layer name=tdnn1 dim=1024
-  relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
-  relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
-
-  # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
-  fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
-  relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
-  fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
-  relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
-  fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
-
-  ## adding the layers for chain branch
-  output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' models... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
-
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-
-fi
-
-
-if [ $stage -le 12 ]; then
-
-  hostInAtlas="ares hephaestus jupiter neptune"
-  if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage \
-      $dir/egs/storage
-  fi
-  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-  #  utils/create_split_dir.pl \
-  #   /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  #fi
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
-    --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
-    --chain.apply-deriv-weights false \
-    --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.num-chunk-per-minibatch 64,32 \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.optimization.shrink-value 0.99 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.optimization.momentum 0.0 \
-    --trainer.deriv-truncate-margin 8 \
-    --egs.stage $get_egs_stage \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --egs.chunk-width $frames_per_chunk \
-    --egs.chunk-left-context $chunk_left_context \
-    --egs.chunk-right-context $chunk_right_context \
-    --egs.chunk-left-context-initial 0 \
-    --egs.chunk-right-context-final 0 \
-    --egs.dir "$common_egs_dir" \
-    --cleanup.remove-egs $remove_egs \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 13 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/lang/check_phones_compatible.sh \
-    data/lang_test_tgsmall/phones.txt $lang/phones.txt
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test_tgsmall \
-    $tree_dir $tree_dir/graph_tgsmall || exit 1;
-fi
-
-if $test_online_decoding && [ $stage -le 14 ]; then
-  # note: if the features change (e.g. you add pitch features), you will have to
-  # change the options of the following command line.
-  steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf \
-    $lang exp/nnet3_rvb/extractor ${dir} ${dir}_online
-
-  rm $dir/.error 2>/dev/null || true
-
-  for data in $test_sets; do
-    (
-      data_affix=$(echo $data | sed s/test_//)
-      nspk=$(wc -l <data/${data}/spk2utt)
-      # note: we just give it "data/${data}" as it only uses the wav.scp, the
-      # feature type does not matter.
-      for lmtype in tgsmall; do
-        steps/online/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj $nspk --cmd "$decode_cmd" \
-          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
-      done
-      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
-        data/lang_test_{tgsmall,fglarge} \
-       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh
deleted file mode 100755
index 5ebb85009d8..00000000000
--- a/egs/zeroth_korean/s5/local/chain/run_tdnn_1b.sh
+++ /dev/null
@@ -1,271 +0,0 @@
-#!/bin/bash
-
-set -e -o pipefail
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-nj=30
-train_set=train_clean
-speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
-                 # should have alignments for the specified training data.
-nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
-
-# Options which are not passed through to run_ivector_common.sh
-affix=1b  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-
-# LSTM/chain options
-train_stage=-10
-xent_regularize=0.1
-max_param_change=2.0
-
-# training chunk-options
-chunk_width=150
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-
-# training options
-srand=0
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=4
-minibatch_size=128
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-remove_egs=true
-
-
-#decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
-
-if [ "$speed_perturb" == "true" ]; then
-  train_set=${train_set}_sp
-fi
-
-gmm_dir=exp/${gmm}
-lat_dir=exp/chain/${gmm}_${train_set}_lats
-dir=exp/chain/tdnn${affix}
-train_data_dir=data/${train_set}_hires
-train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
-lores_train_data_dir=data/${train_set}
-
-# note: you don't necessarily have to change the treedir name
-# each time you do a new experiment-- only if you change the
-# configuration in a way that affects the tree.
-tree_dir=exp/chain/tree_a
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-if [ -d exp/${gmm}_ali_${train_set} ]; then 
-    ali_dir=exp/${gmm}_ali_${train_set}
-else
-    echo "$0: Using Alignment from GMM dir at ${gmm}..."
-    ali_dir=${gmm_dir}
-fi
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 8 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
-    data/lang $gmm_dir $lat_dir
-  rm $lat_dir/fsts.*.gz # save space
-fi
-
-if [ $stage -le 10 ]; then
-  # Build a tree using our new topology.  
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor 3 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-if [ $stage -le 11 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=100 name=ivector
-  input dim=40 name=input
-
-  # please note that it is important to have input layer with the name=input
-  # as the layer immediately preceding the fixed-affine-layer to enable
-  # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
-
-  # the first splicing is moved before the lda layer, so no splicing here
-  relu-renorm-layer name=tdnn1 dim=512
-  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
-  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
-  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
-  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
-  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
-
-  ## adding the layers for chain branch
-  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' models... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 12 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
-    --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
-    --chain.apply-deriv-weights false \
-    --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.srand $srand \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --egs.chunk-width $chunk_width \
-    --egs.chunk-left-context $chunk_left_context \
-    --egs.chunk-right-context $chunk_right_context \
-    --egs.dir "$common_egs_dir" \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs $remove_egs \
-    --use-gpu true \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 13 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/lang/check_phones_compatible.sh \
-    data/lang_test_tgsmall/phones.txt $lang/phones.txt
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test_tgsmall \
-    $tree_dir $tree_dir/graph_tgsmall || exit 1;
-fi
-
-if $test_online_decoding && [ $stage -le 14 ]; then
-  # note: if the features change (e.g. you add pitch features), you will have to
-  # change the options of the following command line.
-  steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf \
-    $lang exp/nnet3/extractor ${dir} ${dir}_online
-
-  rm $dir/.error 2>/dev/null || true
-
-  for data in $test_sets; do
-    (
-      data_affix=$(echo $data | sed s/test_//)
-      nspk=$(wc -l <data/${data}/spk2utt)
-      # note: we just give it "data/${data}" as it only uses the wav.scp, the
-      # feature type does not matter.
-      for lmtype in tgsmall; do
-        steps/online/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj $nspk --cmd "$decode_cmd" \
-          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
-      done
-      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
-        data/lang_test_{tgsmall,fglarge} \
-       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh
deleted file mode 100755
index 33d4790ce55..00000000000
--- a/egs/zeroth_korean/s5/local/chain/run_tdnn_1n.sh
+++ /dev/null
@@ -1,275 +0,0 @@
-#!/bin/bash
-
-set -e -o pipefail
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-nj=30
-train_set=train_clean
-speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
-                 # should have alignments for the specified training data.
-nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
-
-# Options which are not passed through to run_ivector_common.sh
-affix=1n  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-
-# LSTM/chain options
-train_stage=-10
-xent_regularize=0.1
-max_param_change=2.0
-
-# training chunk-options
-get_egs_stage=-10
-chunk_width=150,110,100
-
-# training options
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=6
-minibatch_size=128
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-remove_egs=true
-
-
-#decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
-
-if [ "$speed_perturb" == "true" ]; then
-  train_set=${train_set}_sp
-fi
-
-gmm_dir=exp/${gmm}
-lat_dir=exp/chain/${gmm}_${train_set}_lats
-dir=exp/chain/tdnn${affix}
-train_data_dir=data/${train_set}_hires
-train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
-lores_train_data_dir=data/${train_set}
-
-# note: you don't necessarily have to change the treedir name
-# each time you do a new experiment-- only if you change the
-# configuration in a way that affects the tree.
-tree_dir=exp/chain/tree_a
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-if [ -d exp/${gmm}_ali_${train_set} ]; then 
-    ali_dir=exp/${gmm}_ali_${train_set}
-else
-    echo "$0: Using Alignment from GMM dir at ${gmm}..."
-    ali_dir=${gmm_dir}
-fi
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 8 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
-    data/lang $gmm_dir $lat_dir
-  rm $lat_dir/fsts.*.gz # save space
-fi
-
-if [ $stage -le 10 ]; then
-  # Build a tree using our new topology.  
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor 3 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-if [ $stage -le 11 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  opts="l2-regularize=0.002"
-  linear_opts="orthonormal-constraint=1.0"
-  output_opts="l2-regularize=0.0005 bottleneck-dim=256"
-
-  mkdir -p $dir/configs
-
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=100 name=ivector
-  input dim=40 name=input
-
-  # please note that it is important to have input layer with the name=input
-  # as the layer immediately preceding the fixed-affine-layer to enable
-  # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
-
-  # the first splicing is moved before the lda layer, so no splicing here
-  relu-batchnorm-layer name=tdnn1 $opts dim=1280
-  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
-  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
-  linear-component name=tdnn3l dim=256 $linear_opts
-  relu-batchnorm-layer name=tdnn3 $opts dim=1280
-  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
-  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
-  linear-component name=tdnn5l dim=256 $linear_opts
-  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
-  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn7l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
-  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
-  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn8l,tdnn6l) dim=1280
-  linear-component name=prefinal-l dim=256 $linear_opts
-
-  relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
-  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
-
-  relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 12 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
-    --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.0 \
-    --chain.apply-deriv-weights false \
-    --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --egs.stage $get_egs_stage \
-    --egs.chunk-width $chunk_width \
-    --egs.dir "$common_egs_dir" \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs $remove_egs \
-    --use-gpu true \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 13 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/lang/check_phones_compatible.sh \
-    data/lang_test_tgsmall/phones.txt $lang/phones.txt
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test_tgsmall \
-    $tree_dir $tree_dir/graph_tgsmall || exit 1;
-fi
-
-if $test_online_decoding && [ $stage -le 14 ]; then
-  # note: if the features change (e.g. you add pitch features), you will have to
-  # change the options of the following command line.
-  steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf \
-    $lang exp/nnet3/extractor ${dir} ${dir}_online
-
-  rm $dir/.error 2>/dev/null || true
-
-  for data in $test_sets; do
-    (
-      data_affix=$(echo $data | sed s/test_//)
-      nspk=$(wc -l <data/${data}/spk2utt)
-      # note: we just give it "data/${data}" as it only uses the wav.scp, the
-      # feature type does not matter.
-      for lmtype in tgsmall; do
-        steps/online/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj $nspk --cmd "$decode_cmd" \
-          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
-      done
-      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
-        data/lang_test_{tgsmall,fglarge} \
-       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh
deleted file mode 100755
index 7b2eb9904f1..00000000000
--- a/egs/zeroth_korean/s5/local/chain/run_tdnn_lstm_1e.sh
+++ /dev/null
@@ -1,290 +0,0 @@
-#!/bin/bash
-
-set -e -o pipefail
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-nj=30
-train_set=train_clean
-speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
-                 # should have alignments for the specified training data.
-nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
-
-# Options which are not passed through to run_ivector_common.sh
-affix=1e  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-
-# LSTM/chain options
-train_stage=-10
-get_egs_stage=-10
-xent_regularize=0.01
-self_repair_scale=0.00001
-label_delay=5
-max_param_change=2.0
-
-# training chunk-options
-chunk_left_context=40
-chunk_right_context=0
-frames_per_chunk=140,100,160
-
-frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
-extra_left_context=50
-extra_right_context=0
-
-# training options
-srand=0
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=4
-minibatch_size=128
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-remove_egs=true
-
-
-#decode options
-test_online_decoding=true  # if true, it will run the last decoding stage.
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
-
-suffix=
-if [ "$speed_perturb" == "true" ]; then
-  train_set=${train_set}_sp
-  suffix=_sp
-fi
-
-gmm_dir=exp/${gmm}
-lat_dir=exp/chain/${gmm}_${train_set}_lats
-dir=exp/chain/tdnn_lstm_${affix}${suffix}
-train_data_dir=data/${train_set}_hires
-train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
-lores_train_data_dir=data/${train_set}
-
-# note: you don't necessarily have to change the treedir name
-# each time you do a new experiment-- only if you change the
-# configuration in a way that affects the tree.
-tree_dir=exp/chain/tree_a
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-
-if [ -d exp/${gmm}_ali_${train_set} ]; then 
-    ali_dir=exp/${gmm}_ali_${train_set}
-else
-    echo "$0: Using Alignment from GMM dir at ${gmm}..."
-    ali_dir=${gmm_dir}
-fi
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
-    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 8 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 9 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
-    data/lang $gmm_dir $lat_dir
-  rm $lat_dir/fsts.*.gz # save space
-fi
-
-if [ $stage -le 10 ]; then
-  # Build a tree using our new topology.  
-   if [ -f $tree_dir/final.mdl ]; then
-     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-     exit 1;
-  fi
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor 3 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-if [ $stage -le 11 ]; then
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-
-  lstm_opts="decay-time=20"
-
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=100 name=ivector
-  input dim=40 name=input
-
-  # please note that it is important to have input layer with the name=input
-  # as the layer immediately preceding the fixed-affine-layer to enable
-  # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
-
-  # the first splicing is moved before the lda layer, so no splicing here
-  relu-renorm-layer name=tdnn1 dim=1024
-  relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
-  relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
-
-  # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
-  fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
-  relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
-  fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
-  relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
-  fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
-
-  ## adding the layers for chain branch
-  output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' models... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
-
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-
-fi
-
-
-if [ $stage -le 12 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
-    --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
-    --chain.apply-deriv-weights false \
-    --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.num-chunk-per-minibatch 64,32 \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.optimization.shrink-value 0.99 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.optimization.momentum 0.0 \
-    --trainer.deriv-truncate-margin 8 \
-    --egs.stage $get_egs_stage \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --egs.chunk-width $frames_per_chunk \
-    --egs.chunk-left-context $chunk_left_context \
-    --egs.chunk-right-context $chunk_right_context \
-    --egs.chunk-left-context-initial 0 \
-    --egs.chunk-right-context-final 0 \
-    --egs.dir "$common_egs_dir" \
-    --cleanup.remove-egs $remove_egs \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 13 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/lang/check_phones_compatible.sh \
-    data/lang_test_tgsmall/phones.txt $lang/phones.txt
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test_tgsmall \
-    $tree_dir $tree_dir/graph_tgsmall || exit 1;
-fi
-
-if $test_online_decoding && [ $stage -le 14 ]; then
-  # note: if the features change (e.g. you add pitch features), you will have to
-  # change the options of the following command line.
-  steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf \
-    $lang exp/nnet3/extractor ${dir} ${dir}_online
-
-  rm $dir/.error 2>/dev/null || true
-
-  for data in $test_sets; do
-    (
-      data_affix=$(echo $data | sed s/test_//)
-      nspk=$(wc -l <data/${data}/spk2utt)
-      # note: we just give it "data/${data}" as it only uses the wav.scp, the
-      # feature type does not matter.
-      for lmtype in tgsmall; do
-        steps/online/nnet3/decode.sh \
-          --acwt 1.0 --post-decode-acwt 10.0 \
-          --nj $nspk --cmd "$decode_cmd" \
-          $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
-      done
-      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
-        data/lang_test_{tgsmall,fglarge} \
-       data/${data} ${dir}_online/decode_{${lmtype},fglarge}_test_${data_affix} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru.sh b/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru.sh
new file mode 120000
index 00000000000..aedd4c8b4ac
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_opgru_1a.sh
\ No newline at end of file
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
similarity index 77%
rename from egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh
rename to egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 494f0af5a72..36ef3f08aad 100755
--- a/egs/zeroth_korean/s5/local/chain/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -7,8 +7,9 @@ set -e -o pipefail
 stage=0
 nj=30
 train_set=train_clean
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+speed_perturb=true
+test_sets="test_clean"
+gmm=tri4        # this is the source gmm-dir that we'll use for alignments; it
                  # should have alignments for the specified training data.
 nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
 
@@ -22,16 +23,13 @@ xent_regularize=0.1
 max_param_change=2.0
 
 # training chunk-options
-chunk_width=150
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
+get_egs_stage=-10
+chunk_width=150,110,100
 
 # training options
-srand=0
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=4
+num_jobs_initial=3
+num_jobs_final=16
+num_epochs=6
 minibatch_size=128
 initial_effective_lrate=0.001
 final_effective_lrate=0.0001
@@ -58,7 +56,11 @@ where "nvcc" is installed.
 EOF
 fi
 
-local/nnet3/run_ivector_common.sh --stage $stage 
+local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
+
+if [ "$speed_perturb" == "true" ]; then
+  train_set=${train_set}_sp
+fi
 
 gmm_dir=exp/${gmm}
 lat_dir=exp/chain/${gmm}_${train_set}_lats
@@ -138,11 +140,14 @@ fi
 if [ $stage -le 11 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  opts="l2-regularize=0.002"
+  linear_opts="orthonormal-constraint=1.0"
+  output_opts="l2-regularize=0.0005 bottleneck-dim=256"
 
   mkdir -p $dir/configs
+
   cat <<EOF > $dir/configs/network.xconfig
   input dim=100 name=ivector
   input dim=40 name=input
@@ -150,31 +155,37 @@ if [ $stage -le 11 ]; then
   # please note that it is important to have input layer with the name=input
   # as the layer immediately preceding the fixed-affine-layer to enable
   # the use of short notation for the descriptor
-  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 
   # the first splicing is moved before the lda layer, so no splicing here
-  relu-renorm-layer name=tdnn1 dim=512
-  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
-  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
-  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
-  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
-  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
-
-  ## adding the layers for chain branch
-  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' models... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  relu-batchnorm-layer name=tdnn1 $opts dim=1280
+  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn3l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn3 $opts dim=1280
+  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn5l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
+  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn7l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
+  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
+  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+  linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn8l,tdnn6l) dim=1280
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
 EOF
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
 fi
@@ -192,10 +203,9 @@ if [ $stage -le 12 ]; then
     --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
     --chain.xent-regularize $xent_regularize \
     --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
+    --chain.l2-regularize 0.0 \
     --chain.apply-deriv-weights false \
     --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.srand $srand \
     --trainer.max-param-change $max_param_change \
     --trainer.num-epochs $num_epochs \
     --trainer.frames-per-iter 1500000 \
@@ -204,9 +214,8 @@ if [ $stage -le 12 ]; then
     --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
     --trainer.optimization.final-effective-lrate $final_effective_lrate \
     --trainer.num-chunk-per-minibatch $minibatch_size \
+    --egs.stage $get_egs_stage \
     --egs.chunk-width $chunk_width \
-    --egs.chunk-left-context $chunk_left_context \
-    --egs.chunk-right-context $chunk_right_context \
     --egs.dir "$common_egs_dir" \
     --egs.opts "--frames-overlap-per-eg 0" \
     --cleanup.remove-egs $remove_egs \
diff --git a/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
similarity index 98%
rename from egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh
rename to egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 6b0817c3b37..e0404cd3d7c 100755
--- a/egs/zeroth_korean/s5/local/chain/run_tdnn_opgru_1c.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -8,13 +8,13 @@ stage=0
 nj=30
 train_set=train_clean
 speed_perturb=true
-test_sets="test_200"
-gmm=tri4b        # this is the source gmm-dir that we'll use for alignments; it
+test_sets="test_clean"
+gmm=tri4        # this is the source gmm-dir that we'll use for alignments; it
                  # should have alignments for the specified training data.
 nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
 
 # Options which are not passed through to run_ivector_common.sh
-affix=1c  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
+affix=1a  #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration.
 common_egs_dir=
 
 # OPGRU/chain options
diff --git a/egs/zeroth_korean/s5/local/data_prep.sh b/egs/zeroth_korean/s5/local/data_prep.sh
index 723028afb35..5e6a7d02ce6 100755
--- a/egs/zeroth_korean/s5/local/data_prep.sh
+++ b/egs/zeroth_korean/s5/local/data_prep.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 
-# Copyright 2014  Vassil Panayotov
-#           2014  Johns Hopkins University (author: Daniel Povey)
+# Copyright  2018  Atlas Guide (Author : Lucas Jo)
+#            2018  Gridspace Inc. (Author: Wonkyum Lee)
 # Apache 2.0
 
 # Modified by Lucas Jo 2017 (Altas Guide)
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <src-dir> <dst-dir>"
-  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
+  echo "e.g.: $0 ./db/train_data_01 data/train_data_01"
   exit 1
 fi
 
@@ -63,9 +63,7 @@ for scriptid_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
     [ ! -f  $reader_trans ] && echo "$0: expected file $reader_trans to exist" && exit 1
     cat $reader_trans >>$trans
 
-    # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered
-    #       to be a different speaker. This is done for simplicity and because we want
-    #       e.g. the CMVN to be calculated per-chapter
+    # NOTE: Each chapter is dedicated to each speaker. 
     awk -v "reader=$reader" -v "scriptid=$scriptid" '{printf "%s %s_%s\n", $1, reader, scriptid}' \
       <$reader_trans >>$utt2spk || exit 1
     
diff --git a/egs/zeroth_korean/s5/local/download_and_untar.sh b/egs/zeroth_korean/s5/local/download_and_untar.sh
index 0b56bcb37b3..2e62a3273d4 100755
--- a/egs/zeroth_korean/s5/local/download_and_untar.sh
+++ b/egs/zeroth_korean/s5/local/download_and_untar.sh
@@ -26,7 +26,7 @@ if [ ! -f $dir/db.tar.gz ]; then
   if [ ! -d $dir ]; then 
     mkdir -p $dir
   fi
-  wget -O $dir/db.tar.gz https://storage.googleapis.com/zeroth_project/zeroth_korean.tar.gz 
+  wget -O $dir/db.tar.gz http://www.openslr.org/resources/40/zeroth_korean.tar.gz 
 else
   echo "  $dir/db.tar.gz already exist"
 fi
diff --git a/egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh b/egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh
deleted file mode 100755
index 42ea2dc4b9d..00000000000
--- a/egs/zeroth_korean/s5/local/multi_condition/copy_ali_dir.sh
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/bash
-
-# Copyright 2014  Johns Hopkins University (author: Vijayaditya Peddinti)
-# Apache 2.0
-
-# This script operates on a directory, such as in exp/tri4a_ali,
-# that contains some subset of the following files:
-#  ali.*.gz
-#  tree
-#  cmvn_opts
-#  splice_opts
-#  num_jobs
-#  final.mdl
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance names.
-
-
-# begin configuration section
-utt_prefix=
-utt_suffix=
-cmd=run.pl
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <src_dir> <dest_dir>"
-  echo "e.g.:"
-  echo " $0  --utt-prefix=1- exp/tri4a_ali exp/tri4a_rev1_ali"
-  echo "Options"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-src_dir=$1
-dest_dir=$2
-
-mkdir -p $dest_dir
-
-if [ ! -f $src_dir/ali.1.gz ]; then
-  echo "copy_ali_dir.sh: no such files $src_dir/ali.*.gz"
-  exit 1;
-fi
-
-for f in tree cmvn_opts splice_opts num_jobs final.mdl; do
-  if [ ! -f $src_dir/$f ]; then
-    echo "copy_ali_dir.sh: no such file $src_dir/$f this might be serious error."
-    continue
-  fi
-  cp $src_dir/$f $dest_dir/
-done
-
-nj=$(cat $dest_dir/num_jobs)
-mkdir -p $dest_dir/temp
-cat << EOF > $dest_dir/temp/copy_ali.sh
-set -e;
-id=\$1
-echo "$src_dir/ali.\$id.gz"
-gunzip -c $src_dir/ali.\$id.gz | \
-  copy-int-vector ark:- ark,t:- | \
-python -c "
-import sys
-for line in sys.stdin:
-  parts = line.split()
-  print '$utt_prefix{0}$utt_suffix {1}'.format(parts[0], ' '.join(parts[1:]))
-" | \
-  gzip -c >$dest_dir/ali.\$id.gz || exit 1;
-set +o pipefail; # unset the pipefail option.
-EOF
-chmod +x $dest_dir/temp/copy_ali.sh
-$cmd -v PATH JOB=1:$nj $dest_dir/temp/copy_ali.JOB.log $dest_dir/temp/copy_ali.sh JOB || exit 1;
-
-echo "$0: copied alignments from $src_dir to $dest_dir"
diff --git a/egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh b/egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh
deleted file mode 100755
index c7e563906c6..00000000000
--- a/egs/zeroth_korean/s5/local/nnet2/run_5a_recData01.sh
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/bin/bash
-
-# This is p-norm neural net training, with the "fast" script, on top of adapted
-# 40-dimensional features.
-
-# Modified by Lucas Jo 2017 (Altas Guide)
-
-
-train_stage=-10
-use_gpu=true
-
-. cmd.sh
-. ./path.sh
-. utils/parse_options.sh
-
-
-if $use_gpu; then
-  if ! cuda-compiled; then
-    cat <<EOF && exit 1 
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-  fi
-  parallel_opts="--gpu 1"
-  num_threads=1
-  minibatch_size=512
-  dir=exp/nnet5a_recData01_gpu
-else
-  # with just 4 jobs this might be a little slow.
-  num_threads=16
-  parallel_opts="--num-threads $num_threads" 
-  minibatch_size=128
-  dir=exp/nnet5a_recData01
-fi
-
-. ./cmd.sh
-. utils/parse_options.sh
-
-if [ ! -f $dir/final.mdl ]; then
-  #if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then 
-  #   # spread the egs over various machines.  will help reduce overload of any
-  #   # one machine.
-  #   utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/egs/storage $dir/egs/storage
-  #fi
-
-  steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
-   --samples-per-iter 400000 \
-   --parallel-opts "$parallel_opts" \
-   --num-threads "$num_threads" \
-   --minibatch-size "$minibatch_size" \
-   --num-jobs-nnet 4  --mix-up 8000 \
-   --initial-learning-rate 0.01 --final-learning-rate 0.001 \
-   --num-hidden-layers 4 \
-   --pnorm-input-dim 2000 --pnorm-output-dim 400 \
-   --cmd "$decode_cmd" \
-    data/recData01 data/lang exp/tri4b_ali_recData01 $dir || exit 1
-fi
-
-
-for test in testData01; do
-  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" \
-    --transform-dir exp/tri4b/decode_tgsmall_$test \
-    exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
-  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-    data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
-  steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-    data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
-  steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-    data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
-done
-
-exit 0;
-
diff --git a/egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh b/egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh
deleted file mode 100755
index 9467dc54230..00000000000
--- a/egs/zeroth_korean/s5/local/nnet2/run_5a_train_2x.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-
-# This is p-norm neural net training, with the "fast" script, on top of adapted
-# 40-dimensional features.
-
-# Modified by Lucas Jo 2017 (Altas Guide)
-
-
-train_stage=-10
-use_gpu=true
-stage=0
-
-. cmd.sh
-. ./path.sh
-. utils/parse_options.sh
-
-
-if $use_gpu; then
-  if ! cuda-compiled; then
-    cat <<EOF && exit 1 
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-  fi
-  parallel_opts="--gpu 1"
-  num_threads=1
-  minibatch_size=512
-  dir=exp/nnet5a_train_2x_gpu
-else
-  # with just 4 jobs this might be a little slow.
-  num_threads=16
-  parallel_opts="--num-threads $num_threads" 
-  minibatch_size=128
-  dir=exp/nnet5a_train_2x
-fi
-
-. ./cmd.sh
-. utils/parse_options.sh
-
-if [ $stage -le 1 ]; then
-    if [ ! -f $dir/final.mdl ]; then
-      #if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then 
-      #   # spread the egs over various machines.  will help reduce overload of any
-      #   # one machine.
-      #   utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/egs/storage $dir/egs/storage
-      #fi
-    
-      steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
-       --samples-per-iter 400000 \
-       --parallel-opts "$parallel_opts" \
-       --num-threads "$num_threads" \
-       --minibatch-size "$minibatch_size" \
-       --num-jobs-nnet 4  --mix-up 8000 \
-       --initial-learning-rate 0.01 --final-learning-rate 0.001 \
-       --num-hidden-layers 4 \
-       --pnorm-input-dim 2000 --pnorm-output-dim 400 \
-       --cmd "$decode_cmd" \
-        data/train_2x data/lang exp/tri5b $dir || exit 1
-    fi
-fi
-    
-if [ $stage -le 2 ]; then
-    for test in test_200 test_noisy_snr20_200; do
-      steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
-        --transform-dir exp/tri5b/decode_tgsmall_$test \
-        exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
-      #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-      #  data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
-      #steps/lmrescore_const_arpa.sh \
-      #  --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-      #  data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
-      steps/lmrescore_const_arpa.sh \
-        --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-        data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
-    done
-fi
-
-#if [ $stage -le 3 ]; then
-#	echo "#### $0: stage 3 #####"
-#	# If this setup used PLP features, we'd have to give the option --feature-type plp
-#	# to the script below.
-#	steps/online/nnet2/prepare_online_decoding.sh data/lang "$dir" ${dir}_online || exit 1;
-#fi
-#
-#if [ $stage -le 4 ]; then
-#	echo "#### $0: stage 4 #####"
-#	# this version of the decoding treats each utterance separately
-#	# without carrying forward speaker information.
-#	for test in test_200 test_noisy_snr20_200; do
-#		steps/online/nnet2/decode.sh --cmd "$decode_cmd" --nj 20 \
-#			--per-utt true exp/tri4b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
-#		#steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#		#	data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt  || exit 1;
-#		#steps/lmrescore_const_arpa.sh \
-#		#	--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#		#	data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
-#		steps/lmrescore_const_arpa.sh \
-#			--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#			data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
-#	done
-#fi
-#
-#exit 0;
-
diff --git a/egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh b/egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh
deleted file mode 100755
index bd94a39f4e0..00000000000
--- a/egs/zeroth_korean/s5/local/nnet2/run_5a_train_clean.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-
-# This is p-norm neural net training, with the "fast" script, on top of adapted
-# 40-dimensional features.
-
-# Modified by Lucas Jo 2017 (Altas Guide)
-
-
-train_stage=-10
-use_gpu=true
-
-. cmd.sh
-. ./path.sh
-. utils/parse_options.sh
-
-
-if $use_gpu; then
-  if ! cuda-compiled; then
-    cat <<EOF && exit 1 
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-  fi
-  parallel_opts="--gpu 1"
-  num_threads=1
-  minibatch_size=512
-  dir=exp/nnet5a_train_clean_gpu
-else
-  # with just 4 jobs this might be a little slow.
-  num_threads=16
-  parallel_opts="--num-threads $num_threads" 
-  minibatch_size=128
-  dir=exp/nnet5a_train_clean
-fi
-
-. ./cmd.sh
-. utils/parse_options.sh
-
-if [ ! -f $dir/final.mdl ]; then
-  #if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then 
-  #   # spread the egs over various machines.  will help reduce overload of any
-  #   # one machine.
-  #   utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/egs/storage $dir/egs/storage
-  #fi
-
-  steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
-   --samples-per-iter 400000 \
-   --parallel-opts "$parallel_opts" \
-   --num-threads "$num_threads" \
-   --minibatch-size "$minibatch_size" \
-   --num-jobs-nnet 4  --mix-up 8000 \
-   --initial-learning-rate 0.02 --final-learning-rate 0.004 \
-   --num-hidden-layers 3 \
-   --pnorm-input-dim 2000 --pnorm-output-dim 400 \
-   --cmd "$decode_cmd" \
-    data/train_clean data/lang exp/tri4b_ali_train_clean $dir || exit 1
-fi
-
-
-#for test in test_clean; do
-for test in test_200; do
-  steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
-    --transform-dir exp/tri4b/decode_tgsmall_$test \
-    exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
-  #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-  #  data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
-  #steps/lmrescore_const_arpa.sh \
-  #  --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-  #  data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
-  steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-    data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
-done
-
-exit 0;
-
diff --git a/egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh
deleted file mode 100755
index deb5d146ce8..00000000000
--- a/egs/zeroth_korean/s5/local/nnet3/multi_condition/run_ivector_common.sh
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/bin/bash
-
-# this script contains some common (shared) parts of the run_nnet*.sh scripts.
-. cmd.sh
-
-
-stage=0
-gmmdir=exp/tri4b
-speed_perturb=false
-trainset=train_clean
-num_data_reps=1
-rvb_affix=_rvb
-nj=20
-maxThread=30  # the max number of schedullable thread on your machine 
-
-set -e
-. cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if [ "$speed_perturb" == "true" ]; then
-  if [ $stage -le 1 ]; then
-    #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
-    # _sp stands for speed-perturbed
-
-    for datadir in ${trainset} ; do
-      utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
-      utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
-      utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
-      utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
-      rm -r data/temp1 data/temp2
-
-      mfccdir=mfcc_perturbed
-      hostInAtlas="ares hephaestus jupiter neptune"
-      if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]] && [ ! -d $mfccdir/storage ]; then
-        mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
-        utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi-$(date +'%m_%d_%H_%M')/s5/$mfcc/storage \
-          $mfccdir/storage
-      fi
-      steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj \
-        data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
-      steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
-      utils/fix_data_dir.sh data/${datadir}_tmp
-
-      utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
-      utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
-      utils/fix_data_dir.sh data/${datadir}_sp
-      rm -r data/temp0 data/${datadir}_tmp
-    done
-  fi
-
-  if [ $stage -le 2 ]; then
-    #obtain the alignment of the perturbed data
-    steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
-      data/${trainset}_sp data/lang_nosp ${gmmdir} ${gmmdir}_ali_${trainset}_sp || exit 1
-  fi
-  trainset=${trainset}_sp
-fi
-
-if [ $stage -le 3 ]; then
-  echo "$0: creating reverberated MFCC features"
-
-  mfccdir=mfcc_rvb
-  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
-  # this shows how you can split across multiple file-systems.  we'll split the
-  # MFCC dir across multiple locations.  You might want to be careful here, if you
-  # have multiple copies of Kaldi checked out and run the same recipe, not to let
-  # them overwrite each other.
-  hostInAtlas="ares hephaestus jupiter neptune"
-  if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]] && [ ! -d $mfccdir/storage ]; then
-    mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
-    utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi-$(date +'%m_%d_%H_%M')/s5/$mfcc/storage \
-      $mfccdir/storage
-  fi
-
-  if [ ! -d "RIRS_NOISES" ]; then
-    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-    unzip rirs_noises.zip
-  fi
-
-  rvb_opts=()
-  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
-  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
-  rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
-
-
-  ### applied GridEngine for speed-up
-  logdir=data/${trainset}/log
-  mkdir -p $logdir
-  nj=40
-
-  utils/split_data.sh data/$trainset $nj
-
-  $train_cmd JOB=1:$nj $logdir/reverberate.JOB.log \
-      python steps/data/reverberate_data_dir.py \
-      "${rvb_opts[@]}" \
-      --prefix "rev" \
-      --foreground-snrs "20:10:15:5:0" \
-      --background-snrs "20:10:15:5:0" \
-      --speech-rvb-probability 1 \
-      --pointsource-noise-addition-probability 1 \
-      --isotropic-noise-addition-probability 1 \
-      --num-replications ${num_data_reps} \
-      --max-noises-per-minute 20 \
-      --source-sampling-rate 16000 \
-      --include-original-data true \
-      data/${trainset}/split$nj/JOB data/${trainset}/split$nj/JOB_rvb${num_data_reps} \
-      || exit 1
-
-  dirs=
-  for i in $(seq $nj); do
-    dirs+=" data/${trainset}/split$nj/${i}_rvb${num_data_reps}"
-  done
-  mkdir -p data/${trainset}_rvb${num_data_reps}
-  utils/combine_data.sh data/${trainset}_rvb${num_data_reps} $dirs
-  ###
-
-  utils/copy_data_dir.sh data/${trainset}_rvb${num_data_reps} data/${trainset}_rvb${num_data_reps}_hires
-  utils/data/perturb_data_dir_volume.sh data/${trainset}_rvb${num_data_reps}_hires
-
-  ###
-  # commented by Lucas Jo 2017.10.31
-  #
-  # utt2dur is same after reverberation process
-  # this will helpfull to reduce time consuming get_egs.sh in nnet3 training
-  from=data/${trainset}
-  to=data/${trainset}_rvb${num_data_reps}_hires
-  
-  for i in `seq 1 $nj`; do
-	  cat data/${trainset}/split$nj/$i/reco2dur
-  done | sort -k1 > $from/reco2dur  
-  
-  if [ -f $to/utt2dur ] ; then
-    rm $to/uttdur
-  fi
-  for i in `seq 0 ${num_data_reps}`; do
-    cat $from/reco2dur | sed -e "s/^/rev${i}_/" >> $to/utt2dur  
-  done
-  ###
-
-
-  for datadir in ${trainset}_rvb${num_data_reps} ; do
-    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
-      --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
-    steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
-  done
-
-  # copy the alignments for the newly created utterance ids
-  ali_dirs=
-  for i in `seq 0 $num_data_reps`; do
-    local/multi_condition/copy_ali_dir.sh --cmd "$decode_cmd" --utt-prefix "rev${i}_" ${gmmdir}_ali_${trainset} ${gmmdir}_ali_${trainset}_temp_$i || exit 1;
-    ali_dirs+=" ${gmmdir}_ali_${trainset}_temp_$i"
-  done
-  steps/combine_ali_dirs.sh data/${trainset}_rvb${num_data_reps} ${gmmdir}_ali_${trainset}_rvb $ali_dirs || exit 1;
-
-  # We need to build a small system just because we need the LDA+MLLT transform
-  # to train the diag-UBM on top of.  We align a subset of training data for
-  # this purpose.
-  utils/subset_data_dir.sh data/${trainset}_rvb${num_data_reps}_hires 100000 data/train_100k_hires
-  utils/subset_data_dir.sh data/${trainset}_rvb${num_data_reps}_hires 30000 data/train_30k_hires
-fi
-
-
-if [ $stage -le 4 ]; then
-  # Train a small system just for its LDA+MLLT transform.  We use --num-iters 13
-  # because after we get the transform (12th iter is the last), any further
-  # training is pointless.
-
-  mkdir exp -p exp/nnet3${rvb_affix}
-
-  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
-    --realign-iters "" \
-    --splice-opts "--left-context=3 --right-context=3" \
-    3000 10000 data/train_100k_hires data/lang_nosp \
-    ${gmmdir}_ali_${trainset}_rvb exp/nnet3${rvb_affix}/tri2b
-fi
-
-
-if [ $stage -le 5 ]; then
-  # To train a diagonal UBM we don't need very much data, so use a small subset
-  # (actually, it's not that small: still around 100 hours).
-  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj $nj --num_threads $maxThread --num-frames 700000 \
-    data/train_30k_hires 512 exp/nnet3${rvb_affix}/tri2b exp/nnet3${rvb_affix}/diag_ubm
-fi
-
-if [ $stage -le 6 ]; then
-  # iVector extractors can in general be sensitive to the amount of data, but
-  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
-  # we use just the 3k subset (about one fifth of the data, or 200 hours).
-  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
-    data/${trainset}_rvb${num_data_reps}_hires exp/nnet3${rvb_affix}/diag_ubm exp/nnet3${rvb_affix}/extractor || exit 1;
-fi
-
-if [ $stage -le 7 ]; then
-  ivectordir=exp/nnet3${rvb_affix}/ivectors_${trainset}_rvb${num_data_reps}_hires
-
-  # We extract iVectors on all the train data, which will be what we train the
-  # system on.  With --utts-per-spk-max 2, the script.  pairs the utterances
-  # into twos, and treats each of these pairs as one speaker.  Note that these
-  # are extracted 'online'.
-
-  # having a larger number of speakers is helpful for generalization, and to
-  # handle per-utterance decoding well (iVector starts at zero).
-  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
-    data/${trainset}_rvb${num_data_reps}_hires data/${trainset}_rvb${num_data_reps}_hires_max2
-  
-  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
-    data/${trainset}_rvb${num_data_reps}_hires_max2 exp/nnet3${rvb_affix}/extractor $ivectordir || exit 1;
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
index 116070ab50b..b3b60629a8c 100755
--- a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -5,7 +5,7 @@
 
 
 stage=0
-gmmdir=exp/tri4b
+gmmdir=exp/tri4
 speed_perturb=false
 trainset=train_clean
 
@@ -55,9 +55,6 @@ if [ $stage -le 3 ]; then
   # have multiple copies of Kaldi checked out and run the same recipe, not to let
   # them overwrite each other.
   mfccdir=mfcc_hires
-  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
-  #  utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
-  #fi
 
   for datadir in ${trainset} ; do
     utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
@@ -84,7 +81,7 @@ if [ $stage -le 4 ]; then
     --realign-iters "" \
     --splice-opts "--left-context=3 --right-context=3" \
     3000 10000 data/${trainset}_hires data/lang_nosp \
-    ${gmmdir}_ali_${trainset} exp/nnet3/tri2b
+    ${gmmdir}_ali_${trainset} exp/nnet3/tri2
 fi
 
 
@@ -92,13 +89,10 @@ if [ $stage -le 5 ]; then
   # To train a diagonal UBM we don't need very much data, so use a small subset
   # (actually, it's not that small: still around 100 hours).
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 700000 \
-    data/train_30k_hires 512 exp/nnet3/tri2b exp/nnet3/diag_ubm
+    data/train_30k_hires 512 exp/nnet3/tri2 exp/nnet3/diag_ubm
 fi
 
 if [ $stage -le 6 ]; then
-  # iVector extractors can in general be sensitive to the amount of data, but
-  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
-  # we use just the 3k subset (about one fifth of the data, or 200 hours).
   steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
     data/${trainset}_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1;
 fi
diff --git a/egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh b/egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh
deleted file mode 100755
index a9b4a61c6d2..00000000000
--- a/egs/zeroth_korean/s5/local/online/export_online_nnet2_model.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Lucas Jo (Atlas Guide)
-# Apache 2.0
-
-if [ $# -ne "1" ]; then
-	echo "Usage: $0 <export_dir>"
-	echo "e.g.: $0 ./export"
-	exit 1
-fi
-
-tardir=$1
-srcdir=exp/nnet2_online/nnet_ms_a_online
-graphdir=exp/tri5b/graph_tgsmall
-oldlang=data/lang_test_tgsmall
-newlang=data/lang_test_fglarge
-oldlm=$oldlang/G.fst
-newlm=$newlang/G.carpa
-symtab=$newlang/words.txt
-
-for f in $srcdir/final.mdl $symtab $graphdir/HCLG.fst $srcdir/conf/mfcc.conf \
-	$srcdir/conf/ivector_extractor.conf $oldlm $newlm; do
-	[ ! -f $f ] && echo "export_model.sh: no such file $f" && exit 1;
-done
-
-mkdir -p $tardir/conf
-cp -rpf $srcdir/final.mdl $tardir/final.mdl	# acoustic  model
-cp -rpf $symtab $tardir/words.txt			# word symbol table
-cp -rpf $graphdir/HCLG.fst $tardir/HCLG.fst	# HCLG 
-cp -rpf $srcdir/conf/mfcc.conf $tardir/conf/mfcc.conf
-cp -rpf $srcdir/conf/ivector_extractor.conf $tardir/conf/ivector_extractor.conf
-cp -rpf $oldlm $tardir/G.fst
-cp -rpf $newlm $tardir/G.carpa
diff --git a/egs/zeroth_korean/s5/local/online/run_nnet2_common.sh b/egs/zeroth_korean/s5/local/online/run_nnet2_common.sh
deleted file mode 100755
index d1ac0a2f5d2..00000000000
--- a/egs/zeroth_korean/s5/local/online/run_nnet2_common.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-
-# this script contains some common (shared) parts of the run_nnet*.sh scripts.
-# Modified by Lucas Jo 2017 (Altas Guide)
-. cmd.sh
-
-
-stage=0
-
-set -e
-. cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if [ $stage -le 1 ]; then
-  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
-  # this shows how you can split across multiple file-systems.  we'll split the
-  # MFCC dir across multiple locations.  You might want to be careful here, if you
-  # have multiple copies of Kaldi checked out and run the same recipe, not to let
-  # them overwrite each other.
-  mfccdir=mfcc
-  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
-  #  utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
-  #fi
-
-  for datadir in train_2x; do
-    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
-    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
-      --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
-    steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
-  done
-
-  # now create some data subsets.
-  # mixed is the clean+other data.
-  # a is 1/5 of the data, b is 2/5th of it.
-  utils/subset_data_dir.sh data/train_2x_hires 3000 data/train_mixed_hires_a
-  utils/subset_data_dir.sh data/train_2x_hires 6000 data/train_mixed_hires_b
-fi
-
-if [ $stage -le 2 ]; then
-  # We need to build a small system just because we need the LDA+MLLT transform
-  # to train the diag-UBM on top of.  We align a subset of training data for
-  # this purpose.
-  utils/subset_data_dir.sh --utt-list <(awk '{print $1}' data/train_mixed_hires_a/utt2spk) \
-     data/train_2x data/train_2x_a
-
-  steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
-    data/train_2x_a data/lang exp/tri5b exp/nnet2_online/tri5b_ali_a
-fi
-
-if [ $stage -le 3 ]; then
-  # Train a small system just for its LDA+MLLT transform.  We use --num-iters 13
-  # because after we get the transform (12th iter is the last), any further
-  # training is pointless.
-    #5000 10000 data/train_mixed_hires_a data/lang \
-  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
-    --realign-iters "" \
-    --splice-opts "--left-context=3 --right-context=3" \
-    3000 20000 data/train_mixed_hires_a data/lang \
-    exp/nnet2_online/tri5b_ali_a exp/nnet2_online/tri6b
-fi
-
-
-if [ $stage -le 4 ]; then
-  mkdir -p exp/nnet2_online
-  # To train a diagonal UBM we don't need very much data, so use a small subset
-  # (actually, it's not that small: still around 100 hours).
-  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 400000 \
-    data/train_mixed_hires_a 256 exp/nnet2_online/tri6b exp/nnet2_online/diag_ubm
-fi
-
-if [ $stage -le 5 ]; then
-  # iVector extractors can in general be sensitive to the amount of data, but
-  # this one has a fairly small dim (defaults to 100) so we don't use all of it,
-  # we use just the 3k subset (about one fifth of the data, or 200 hours).
-  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
-    data/train_mixed_hires_b exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1;
-fi
-
-if [ $stage -le 6 ]; then
-  ivectordir=exp/nnet2_online/ivectors_train_2x_hires
-  #if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
-  #  utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
-  #fi
-
-  # We extract iVectors on all the train data, which will be what we train the
-  # system on.  With --utts-per-spk-max 2, the script.  pairs the utterances
-  # into twos, and treats each of these pairs as one speaker.  Note that these
-  # are extracted 'online'.
-
-  # having a larger number of speakers is helpful for generalization, and to
-  # handle per-utterance decoding well (iVector starts at zero).
-  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_2x_hires data/train_2x_hires_max2
-  
-  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 60 \
-    data/train_2x_hires_max2 exp/nnet2_online/extractor $ivectordir || exit 1;
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh b/egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh
deleted file mode 100755
index d46e2f63667..00000000000
--- a/egs/zeroth_korean/s5/local/online/run_nnet2_ms.sh
+++ /dev/null
@@ -1,267 +0,0 @@
-#!/bin/bash
-
-# This is the "multi-splice" version of the online-nnet2 training script.
-# It's currently the best recipe.
-# You'll notice that we splice over successively larger windows as we go deeper
-# into the network.
-
-# Modified by Lucas Jo 2017 (Altas Guide)
-
-. cmd.sh
-
-
-stage=0
-train_stage=-10
-use_gpu=true
-dir=exp/nnet2_online/nnet_ms_a
-exit_train_stage=-100
-
-set -e
-. cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if $use_gpu; then
-  if ! cuda-compiled; then
-    cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
-EOF
-  fi
-  parallel_opts="--gpu 1"
-  num_threads=1
-  minibatch_size=512
-  # the _a is in case I want to change the parameters.
-else
-  # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
-  # almost the same, but this may be a little bit slow.
-  num_threads=16
-  minibatch_size=128
-  parallel_opts="--num-threads $num_threads"
-fi
-
-# do the common parts of the script.
-local/online/run_nnet2_common.sh --stage $stage
-
-echo "#### train_multiaplice_accel2.sh #####"
-if [ $stage -le 7 ]; then
-	#if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-	#  utils/create_split_dir.pl \
-	#   /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-	#fi
-
-	# The size of the system is kept rather smaller than the run_7a_960.sh system:
-	# this is because we want it to be small enough that we could plausibly run it
-	# in real-time.
-	#--num-epochs 8 --num-jobs-initial 3 --num-jobs-final 18 \
-	#--initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
-	#	--num-hidden-layers 6 \
-	#	--splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \
-	#	--num-hidden-layers 4 \
-	#	--splice-indexes "layer0/-1:0:1 layer1/-2:1 layer2/-4:2" \
-	#	--num-hidden-layers 3 \
-	#	--splice-indexes "layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" \
-	steps/nnet2/train_multisplice_accel2.sh --stage $train_stage \
-		--exit-stage $exit_train_stage \
-		--num-epochs 4 --num-jobs-initial 2 --num-jobs-final 2 \
-		--num-hidden-layers 6 \
-		--splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \
-		--feat-type raw \
-		--online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
-		--cmvn-opts "--norm-means=false --norm-vars=false" \
-		--num-threads "$num_threads" \
-		--minibatch-size "$minibatch_size" \
-		--parallel-opts "$parallel_opts" \
-		--io-opts "--max-jobs-run 12" \
-		--initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
-		--cmd "$decode_cmd" \
-		--pnorm-input-dim 3500 \
-		--pnorm-output-dim 350 \
-		--mix-up 12000 \
-		data/train_2x_hires data/lang exp/tri5b $dir  || exit 1;
-fi
-
-#if [ $stage -le 8 ]; then
-#  echo "#### $0: stage 8 #####"
-#  # dump iVectors for the testing data.
-#  for test in test_clean; do
-#    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 10 \
-#      data/${test}_hires exp/nnet2_online/extractor exp/nnet2_online/ivectors_$test || exit 1;
-#  done
-#fi
-
-#if [ $stage -le 9 ]; then
-#  # this does offline decoding that should give about the same results as the
-#  # real online decoding (the one with --per-utt true)
-#  for test in testData01; do
-#    steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \
-#      --online-ivector-dir exp/nnet2_online/ivectors_${test} \
-#      exp/tri5b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
-#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#      data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#      data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
-#  done
-#  for test in testData02; do
-#    steps/nnet2/decode.sh --nj 4 --cmd "$decode_cmd" --config conf/decode.config \
-#      --online-ivector-dir exp/nnet2_online/ivectors_${test} \
-#      exp/tri5b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
-#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#      data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#      data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
-#  done
-#fi
-
-
-if [ $stage -le 10 ]; then
-  echo "#### $0: stage 10 #####"
-  # If this setup used PLP features, we'd have to give the option --feature-type plp
-  # to the script below.
-  steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \
-    data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1;
-fi
-
-#if [ $stage -le 11 ]; then
-#  # do the actual online decoding with iVectors, carrying info forward from
-#  # previous utterances of the same speaker.
-#  for test in testData01; do
-#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 10 \
-#      exp/tri5b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
-#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}  || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
-#  done
-#  for test in testData02; do
-#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 4 \
-#      exp/tri5b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
-#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}  || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
-#  done
-#fi
-
-if [ $stage -le 12 ]; then
-  echo "#### $0: stage 12 #####"
-  # this version of the decoding treats each utterance separately
-  # without carrying forward speaker information.
-  #for test in test_clean test_noisy_snr20; do
-  for test in test_200 test_noisy_snr20_200; do
-    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
-		--per-utt true exp/tri5b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
-    #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-    #  data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt  || exit 1;
-    #steps/lmrescore_const_arpa.sh \
-    #  --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-    #  data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
-    steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
-  done
-fi
-exit 0
-
-#if [ $stage -le 13 ]; then
-#  # this version of the decoding treats each utterance separately
-#  # without carrying forward speaker information, but looks to the end
-#  # of the utterance while computing the iVector (--online false)
-#  for test in testData01; do
-#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 10 \
-#      --per-utt true --online false exp/tri5b/graph_tgsmall data/$test \
-#        ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
-#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
-#  done
-#  for test in testData02; do
-#    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 4 \
-#      --per-utt true --online false exp/tri5b/graph_tgsmall data/$test \
-#        ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
-#    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
-#    steps/lmrescore_const_arpa.sh \
-#      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-#      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
-#  done
-#fi
-
-#if [ $stage -le 14 ]; then
-#  # Creates example data dir.
-#  local/prepare_example_data.sh data/test_clean/ data/test_clean_example
-#
-#  # Copies example decoding script to current directory.
-#  cp local/decode_example.sh .
-#
-#  other_files=data/local/lm/lm_tgsmall.arpa.gz
-#  other_files="$other_files decode_example.sh"
-#  other_dirs=data/test_clean_example/
-#
-#  dist_file=librispeech_`basename ${dir}_online`.tgz
-#  utils/prepare_online_nnet_dist_build.sh \
-#    --other-files "$other_files" --other-dirs "$other_dirs" \
-#    data/lang ${dir}_online $dist_file
-#
-#  rm -rf decode_example.sh
-#  echo "NOTE: If you would like to upload this build ($dist_file) to kaldi-asr.org please check the process at http://kaldi-asr.org/uploads.html"
-#fi
-#
-#exit 0;
-####### Comment out the "exit 0" above to run the multi-threaded decoding. #####
-#
-#if [ $stage -le 15 ]; then
-#  # Demonstrate the multi-threaded decoding.
-#  test=dev_clean
-#  steps/online/nnet2/decode.sh --threaded true \
-#    --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-#    --per-utt true exp/tri6b/graph_tgsmall data/$test \
-#    ${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1;
-#fi
-#
-#if [ $stage -le 16 ]; then
-#  # Demonstrate the multi-threaded decoding with endpointing.
-#  test=dev_clean
-#  steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
-#    --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-#    --per-utt true exp/tri6b/graph_tgsmall data/$test \
-#    ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
-#fi
-#
-#if [ $stage -le 17 ]; then
-#  # Demonstrate the multi-threaded decoding with silence excluded
-#  # from iVector estimation.
-#  test=dev_clean
-#  steps/online/nnet2/decode.sh --threaded true  --silence-weight 0.0 \
-#    --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-#    --per-utt true exp/tri6b/graph_tgsmall data/$test \
-#    ${dir}_online/decode_${test}_tgsmall_utt_threaded_sil0.0 || exit 1;
-#fi
-#
-#exit 0;
diff --git a/egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh b/egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh
deleted file mode 100755
index b642bca9d26..00000000000
--- a/egs/zeroth_korean/s5/local/online/run_nnet2_ms_disc.sh
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/bin/bash
-
-
-# This script does discriminative training on top of the online, multi-splice
-# system trained in run_nnet2_ms.sh.
-# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
-# since the lattice generation runs in about real-time, so takes of the order of
-# 1000 hours of CPU time.
-#
-# Note: rather than using any features we have dumped on disk, this script
-# regenerates them from the wav data three times-- when we do lattice
-# generation, numerator alignment and discriminative training.  This made the
-# script easier to write and more generic, because we don't have to know where
-# the features and the iVectors are, but of course it's a little inefficient.
-# The time taken is dominated by the lattice generation anyway, so this isn't
-# a huge deal.
-
-. cmd.sh
-
-
-stage=0
-train_stage=-10
-use_gpu=true
-srcdir=exp/nnet2_online/nnet_ms_a
-criterion=smbr
-drop_frames=false  # only matters for MMI anyway.
-effective_lrate=0.000005
-num_jobs_nnet=6
-train_stage=-10 # can be used to start training in the middle.
-decode_start_epoch=0 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
-num_epochs=4
-cleanup=false  # run with --cleanup true --stage 6 to clean up (remove large things like denlats,
-               # alignments and degs).
-
-set -e
-. cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if $use_gpu; then
-  if ! cuda-compiled; then
-    cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
-EOF
-  fi
-  parallel_opts="--gpu 1"
-  num_threads=1
-else
-  # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
-  # almost the same, but this may be a little bit slow.
-  num_threads=16
-  parallel_opts="--num-threads $num_threads"
-fi
-
-if [ ! -f ${srcdir}_online/final.mdl ]; then
-  echo "$0: expected ${srcdir}_online/final.mdl to exist; first run run_nnet2_ms.sh."
-  exit 1;
-fi
-
-
-if [ $stage -le 1 ]; then
-  nj=50  # this doesn't really affect anything strongly, except the num-jobs for one of
-         # the phases of get_egs_discriminative2.sh below.
-  num_threads_denlats=6
-  subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving
-              # max total slots = 80 * 6 = 480.
-  steps/nnet2/make_denlats.sh --cmd "$decode_cmd --mem 1G --num-threads $num_threads_denlats" \
-      --online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
-      --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
-     data/train_2x_hires data/lang $srcdir ${srcdir}_denlats || exit 1;
-
-  # the command below is a more generic, but slower, way to do it.
-  #steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd --mem 1G --num-threads $num_threads_denlats" \
-  #    --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
-  #   data/train_2x data/lang ${srcdir}_online ${srcdir}_denlats || exit 1;
-
-fi
-
-if [ $stage -le 2 ]; then
-  # hardcode no-GPU for alignment, although you could use GPU [you wouldn't
-  # get excellent GPU utilization though.]
-  nj=100 # have a high number of jobs because this could take a while, and we might
-         # have some stragglers.
-  use_gpu=no
-  gpu_opts=
-
-  steps/nnet2/align.sh  --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
-     --online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
-     --nj $nj data/train_2x_hires data/lang $srcdir ${srcdir}_ali || exit 1;
-
-  # the command below is a more generic, but slower, way to do it.
-  # steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
-  #    --nj $nj data/train_2x data/lang ${srcdir}_online ${srcdir}_ali || exit 1;
-fi
-
-
-if [ $stage -le 3 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{1,2,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
-  fi
-  # have a higher maximum num-jobs if
-  if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
-
-  steps/nnet2/get_egs_discriminative2.sh \
-    --cmd "$decode_cmd --max-jobs-run $max_jobs" \
-    --online-ivector-dir exp/nnet2_online/ivectors_train_2x_hires \
-    --criterion $criterion --drop-frames $drop_frames \
-     data/train_2x_hires data/lang ${srcdir}{_ali,_denlats,/final.mdl,_degs} || exit 1;
-
-  # the command below is a more generic, but slower, way to do it.
-  #steps/online/nnet2/get_egs_discriminative2.sh \
-  #  --cmd "$decode_cmd --max-jobs-run $max_jobs" \
-  #  --criterion $criterion --drop-frames $drop_frames \
-  #   data/train_2x data/lang ${srcdir}{_ali,_denlats,_online,_degs} || exit 1;
-fi
-
-if [ $stage -le 4 ]; then
-  steps/nnet2/train_discriminative2.sh --cmd "$decode_cmd $parallel_opts" \
-    --stage $train_stage \
-    --effective-lrate $effective_lrate \
-    --criterion $criterion --drop-frames $drop_frames \
-    --num-epochs $num_epochs \
-    --num-jobs-nnet 6 --num-threads $num_threads \
-      ${srcdir}_degs ${srcdir}_${criterion}_${effective_lrate} || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  dir=${srcdir}_${criterion}_${effective_lrate}
-  ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
-
-  for epoch in $(seq $decode_start_epoch $num_epochs); do
-    for test in test_clean test_noisy_snr20; do
-      #(
-        steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
-          --iter epoch$epoch exp/tri5b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
-        steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-          data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed}  || exit 1;
-        steps/lmrescore_const_arpa.sh \
-          --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-          data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
-        steps/lmrescore_const_arpa.sh \
-          --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-          data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
-      #) &
-      #) &
-    done
-  done
-  wait
-  for dir in $dir/decode*; do grep WER $dir/wer_* | utils/best_wer.sh; done
-fi
-
-if [ $stage -le 6 ] && $cleanup; then
-  # if you run with "--cleanup true --stage 6" you can clean up.
-  rm ${srcdir}_denlats/lat.*.gz || true
-  rm ${srcdir}_ali/ali.*.gz || true
-  steps/nnet2/remove_egs.sh ${srcdir}_degs || true
-fi
-
-
-exit 0;
diff --git a/egs/zeroth_korean/s5/local/updateSegmentation.sh b/egs/zeroth_korean/s5/local/updateSegmentation.sh
index e892f902837..aa025765aae 100755
--- a/egs/zeroth_korean/s5/local/updateSegmentation.sh
+++ b/egs/zeroth_korean/s5/local/updateSegmentation.sh
@@ -13,7 +13,8 @@ exists(){
 
 # check morfessor installation 
 if ! exists morfessor; then
-	echo "Please, install Morfessor"
+	echo "You appear to not have Morfessor installed, either on your path."
+    echo "See tools/extras/install_morfessor.sh installation instructions."
 	exit 1
 fi
 
@@ -33,19 +34,3 @@ cut -d' ' -f2- $trans".old" |\
 	| paste -d" " $trans"_tmp_index" - > $trans
 rm -f $trans"_tmp_index"
 
-#transcripList=$(find $dataDir -name "*.norm.txt" -type f | sort)
-#for transcript in $transcripList;
-#do
-#	echo "read: " $transcript
-#	cat $transcript | awk '{print $1;}' > tmp
-#	cat $transcript | awk '{$1="";print $0;}' | \
-#	local/strip.py | \
-#	#morfessor -l $lmDir/data/_lexicon_/mergedCorpus.model4.reduced -T - -o tmp2 --output-format '{analysis} ' --output-newlines
-#	morfessor -l $lmDir/zeroth_morfessor.seg -T - -o tmp2 --output-format '{analysis} ' --output-newlines
-#	#$lmDir/data/_lm_/seg2sentence.py tmp2 > tmp3
-#
-#	array=(${transcript//\./ })
-#	echo "write: " ${array[0]}.${array[1]}.txt
-#	paste -d" " tmp tmp2 > ${array[0]}.${array[1]}.txt
-#done
-#rm -f tmp*
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index 48a3834050c..033366f81b2 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -1,9 +1,8 @@
 #!/bin/bash
 #
-# Based mostly on the WSJ/Librispeech recipe. The training database is #####,
-# it consists of 51hrs korean speech with cleaned automatic transcripts:
-#
-# http://www.openslr.org/resources (Mirror).
+# Based mostly on the WSJ/Librispeech recipe. 
+# The training/testing database is described in http://www.openslr.org/40/
+# This corpus consists of 51hrs korean speech with cleaned automatic transcripts:
 #
 # Copyright  2018  Atlas Guide (Author : Lucas Jo)
 #            2018  Gridspace Inc. (Author: Wonkyum Lee)
@@ -12,13 +11,17 @@
 #
 
 # Check list before start
-# 1. locale setup
-# 2. pre-installed package: awscli, Morfessor-2.0.1, flac, sox, same cuda library, unzip
-# 3. pre-install or symbolic link for easy going: rirs_noises.zip (takes pretty long time)
-# 4. parameters: nCPU, num_jobs_initial, num_jobs_final, --max-noises-per-minute
+# 1. locale setup (see egs/zeroth_korean/s5/path.sh; you need this "export LC_ALL=ko_KR.UTF-8" )
+# 2. required software: Morfessor-2.0.1 (see tools/extras/install_morfessor.sh)
 
+stage=0
 db_dir=./db
-nCPU=16
+nj=16
+
+chain_train=true
+decode=true # set false if you don't want to decode each GMM model
+decode_rescoring=true # set false if you don't want to rescore with large language model
+test_set="test_clean"
 
 . ./cmd.sh
 . ./path.sh
@@ -26,169 +29,237 @@ nCPU=16
 # you might not want to do this for interactive shells.
 set -e
 
-startTime=$(date +'%F-%H-%M')
-echo "started at" $startTime
+if [ $stage -le 0 ]; then
+  # download the data.  
+  local/download_and_untar.sh $db_dir
+fi
 
-# download the data.  
-local/download_and_untar.sh $db_dir
+if [ $stage -le 1 ]; then
+  # format the data as Kaldi data directories
+  for part in train_data_01 test_data_01; do
+  	# use underscore-separated names in data directories.
+  	local/data_prep.sh $db_dir/$part data/$part
+  done
+fi
 
-# format the data as Kaldi data directories
-for part in train_data_01 test_data_01; do
-	# use underscore-separated names in data directories.
-	local/data_prep.sh $db_dir/$part data/$(echo $part | sed s/-/_/g)
-done
+if [ $stage -le 2 ]; then
+  # update segmentation of transcripts
+  for part in train_data_01 test_data_01; do
+  	local/updateSegmentation.sh data/$part data/local/lm
+  done
+fi
 
-# update segmentation of transcripts
-for part in train_data_01 test_data_01; do
-	local/updateSegmentation.sh data/$part data/local/lm
-done
+if [ $stage -le 3 ]; then
+  # prepare dictionary and language model 
+  local/prepare_dict.sh data/local/lm data/local/dict_nosp
+  
+  utils/prepare_lang.sh data/local/dict_nosp \
+  	"<UNK>" data/local/lang_tmp_nosp data/lang_nosp
+fi
 
-# prepare dictionary and language model 
-local/prepare_dict.sh data/local/lm data/local/dict_nosp
+if [ $stage -le 4 ]; then
+  # build testing language model
+  local/format_lms.sh --src-dir data/lang_nosp data/local/lm
+
+  # re-scoring language model
+  if $decode_rescoring ; then
+    utils/build_const_arpa_lm.sh data/local/lm/zeroth.lm.tg.arpa.gz \
+    	data/lang_nosp data/lang_nosp_test_tglarge
+    utils/build_const_arpa_lm.sh data/local/lm/zeroth.lm.fg.arpa.gz \
+    	  data/lang_nosp data/lang_nosp_test_fglarge
+  fi
+fi
 
-utils/prepare_lang.sh data/local/dict_nosp \
-	"<UNK>" data/local/lang_tmp_nosp data/lang_nosp
 
-local/format_lms.sh --src-dir data/lang_nosp data/local/lm
+if [ $stage -le 5 ]; then
+  # Feature extraction (MFCC)
+  mfccdir=mfcc
+  for part in train_data_01 test_data_01; do
+  	steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj data/$part exp/make_mfcc/$part $mfccdir
+  	steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
+  done
+  
+  # ... and then combine data sets into one (for later extension)
+  utils/combine_data.sh \
+    data/train_clean data/train_data_01
+  
+  utils/combine_data.sh \
+    data/test_clean data/test_data_01
+  
+  # Make some small data subsets for early system-build stages.
+  utils/subset_data_dir.sh --shortest data/train_clean 2000 data/train_2kshort
+  utils/subset_data_dir.sh data/train_clean 5000 data/train_5k
+  utils/subset_data_dir.sh data/train_clean 10000 data/train_10k
+fi
 
-# Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
-# it takes long time and do this again after computing silence prob.
-# you can do comment out here this time
+if [ $stage -le 5 ]; then
+  echo "#### Monophone Training ###########"
+  # train a monophone system & align
+  steps/train_mono.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
+  	data/train_2kshort data/lang_nosp exp/mono
+  if $decode; then
+    utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/mono exp/mono/graph_nosp_tgsmall
+    nspk=$(wc -l <data/${test_set}/spk2utt)
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+      exp/mono/graph_nosp_tgsmall data/${test_set} exp/mono/decode_nosp_tgsmall_${test_set}
+    if $decode_rescoring; then
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+        data/$test_set exp/mono/decode_nosp_{tgsmall,tglarge}_$test_set
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \
+        data/$test_set exp/mono/decode_nosp_{tgsmall,fglarge}_$test_set
+    fi 
+  fi
+fi
 
-#utils/build_const_arpa_lm.sh data/local/lm/zeroth.lm.tg.arpa.gz \
-#	data/lang_nosp data/lang_nosp_test_tglarge
-#utils/build_const_arpa_lm.sh data/local/lm/zeroth.lm.fg.arpa.gz \
-#	  data/lang_nosp data/lang_nosp_test_fglarge
+if [ $stage -le 6 ]; then
+  echo "#### Triphone Training, delta + delta-delta ###########"
+  steps/align_si.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
+  	data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
+  # train a first delta + delta-delta triphone system on a subset of 5000 utterancesa
+  # number of maximum pdf, gaussian (under/over fitting)
+  steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
+      2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1
+  if $decode; then
+    utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri1 exp/tri1/graph_nosp_tgsmall
+    nspk=$(wc -l <data/${test_set}/spk2utt)
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+      exp/tri1/graph_nosp_tgsmall data/${test_set} exp/tri1/decode_nosp_tgsmall_${test_set}
+    if $decode_rescoring; then
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+        data/$test_set exp/tri1/decode_nosp_{tgsmall,tglarge}_$test_set
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \
+        data/$test_set exp/tri1/decode_nosp_{tgsmall,fglarge}_$test_set
+    fi
+  fi
+fi
 
-# Feature extraction (MFCC)
-mfccdir=mfcc
-hostInAtlas="ares hephaestus jupiter neptune"
-if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]]; then
-  mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
-  utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER/kaldi-data/zeroth-kaldi/s5/$mfcc/storage \
-    $mfccdir/storage
+if [ $stage -le 7 ]; then
+  echo "#### Triphone Training, LDA+MLLT ###########"
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+    data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k
+  # train an LDA+MLLT system.
+  steps/train_lda_mllt.sh --cmd "$train_cmd" \
+     --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
+     data/train_10k data/lang_nosp exp/tri1_ali_10k exp/tri2
+  if $decode; then
+    utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri2 exp/tri2/graph_nosp_tgsmall
+    nspk=$(wc -l <data/${test_set}/spk2utt)
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+      exp/tri2/graph_nosp_tgsmall data/${test_set} exp/tri2/decode_nosp_tgsmall_${test_set}
+    if $decode_rescoring; then
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+        data/$test_set exp/tri2/decode_nosp_{tgsmall,tglarge}_$test_set
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \
+        data/$test_set exp/tri2/decode_nosp_{tgsmall,fglarge}_$test_set
+    fi
+  fi
 fi
-for part in train_data_01 test_data_01; do
-	steps/make_mfcc.sh --cmd "$train_cmd" --nj $nCPU data/$part exp/make_mfcc/$part $mfccdir
-	steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
-done
-
-# ... and then combine data sets into one (for later extension)
-utils/combine_data.sh \
-  data/train_clean data/train_data_01
-
-utils/combine_data.sh \
-  data/test_clean data/test_data_01
-
-# Make some small data subsets for early system-build stages.
-utils/subset_data_dir.sh --shortest data/train_clean 2000 data/train_2kshort
-utils/subset_data_dir.sh data/train_clean 5000 data/train_5k
-utils/subset_data_dir.sh data/train_clean 10000 data/train_10k
-
-echo "#### Monophone Training ###########"
-# train a monophone system & align
-steps/train_mono.sh --boost-silence 1.25 --nj $nCPU --cmd "$train_cmd" \
-	data/train_2kshort data/lang_nosp exp/mono
-steps/align_si.sh --boost-silence 1.25 --nj $nCPU --cmd "$train_cmd" \
-	data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
-
-echo "#### Triphone Training, delta + delta-delta ###########"
-# train a first delta + delta-delta triphone system on a subset of 5000 utterancesa
-# number of maximum pdf, gaussian (under/over fitting)
-#  recognition result 
-steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
-    2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1
-steps/align_si.sh --nj $nCPU --cmd "$train_cmd" \
-  data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k
-
-echo "#### Triphone Training, LDA+MLLT ###########"
-# train an LDA+MLLT system.
-steps/train_lda_mllt.sh --cmd "$train_cmd" \
-   --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
-   data/train_10k data/lang_nosp exp/tri1_ali_10k exp/tri2b
-
-# Align a 10k utts subset using the tri2b model
-steps/align_si.sh  --nj $nCPU --cmd "$train_cmd" --use-graphs true \
-  data/train_clean data/lang_nosp exp/tri2b exp/tri2b_ali_train_clean
-
-echo "#### Triphone Training, LDA+MLLT+SAT ###########"
-# Train tri3b, which is LDA+MLLT+SAT on 10k utts
-#steps/train_sat.sh --cmd "$train_cmd" 3000 25000 \
-steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \
-  data/train_clean data/lang_nosp exp/tri2b_ali_train_clean exp/tri3b
-
-# Now we compute the pronunciation and silence probabilities from training data,
-# and re-create the lang directory.
-# silence transition probability ...
-steps/get_prons.sh --cmd "$train_cmd" \
-      data/train_clean data/lang_nosp exp/tri3b
-
-utils/dict_dir_add_pronprobs.sh --max-normalize true \
-      data/local/dict_nosp \
-        exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \
-          exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict
-
-utils/prepare_lang.sh data/local/dict \
-      "<UNK>" data/local/lang_tmp data/lang
-
-local/format_lms.sh --src-dir data/lang data/local/lm
-
-utils/build_const_arpa_lm.sh \
-      data/local/lm/zeroth.lm.tg.arpa.gz data/lang data/lang_test_tglarge
-utils/build_const_arpa_lm.sh \
-      data/local/lm/zeroth.lm.fg.arpa.gz data/lang data/lang_test_fglarge
-
-# align the entire train_clean using the tri3b model
-steps/align_fmllr.sh --nj $nCPU --cmd "$train_cmd" \
-  data/train_clean data/lang exp/tri3b exp/tri3b_ali_train_clean
-
-echo "#### SAT again on train_clean ###########"
-# train another LDA+MLLT+SAT system on the entire subset
-steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
-  data/train_clean data/lang exp/tri3b_ali_train_clean exp/tri4b
-
-# decode using the tri4b model with pronunciation and silence probabilities
-utils/mkgraph.sh \
-  data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall
-
-# the size is properly set?
-utils/subset_data_dir.sh data/test_clean 200 data/test_200
-
-for test in test_200; do
-  nspk=$(wc -l <data/${test}/spk2utt)
-  steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd" \
-    exp/tri4b/graph_tgsmall data/$test \
-    exp/tri4b/decode_tgsmall_$test
-  #steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-  #  data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test
-  steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-    data/$test exp/tri4b/decode_{tgsmall,tglarge}_$test
-  steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-    data/$test exp/tri4b/decode_{tgsmall,fglarge}_$test
-done
-
-# align train_clean_100 using the tri4b model
-steps/align_fmllr.sh --nj $nCPU --cmd "$train_cmd" \
-	  data/train_clean data/lang exp/tri4b exp/tri4b_ali_train_clean
-
-finishTime=$(date +'%F-%H-%M')
-echo "GMM trainig is finished at" $finishTime
-exit
-## online chain recipe using only clean data set
-echo "#### online chain training  ###########"
-## check point: sudo nvidia-smi --compute-mode=3 if you have multiple GPU's
-#local/chain/run_tdnn_1a.sh
-#local/chain/run_tdnn_1b.sh
-#local/chain/multi_condition/run_tdnn_lstm_1e.sh --nj $nCPU
-local/chain/multi_condition/run_tdnn_1n.sh --nj $nCPU 
-#local/chain/run_tdnn_opgru_1c.sh --nj $nCPU
-
-
-finishTime=$(date +'%F-%H-%M')
-echo "DNN trainig is finished at" $finishTime
-echo "started at" $startTime
-echo "finished at" $finishTime
+
+
+if [ $stage -le 8 ]; then
+  echo "#### Triphone Training, LDA+MLLT+SAT ###########"
+  # Align the entire train_clean using the tri2 model
+  steps/align_si.sh  --nj $nj --cmd "$train_cmd" --use-graphs true \
+    data/train_clean data/lang_nosp exp/tri2 exp/tri2_ali_train_clean
+  
+  # Train tri3, which is LDA+MLLT+SAT on the entire train_clean
+  steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \
+    data/train_clean data/lang_nosp exp/tri2_ali_train_clean exp/tri3
+  if $decode; then
+    utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri3 exp/tri3/graph_nosp_tgsmall
+    nspk=$(wc -l <data/${test_set}/spk2utt)
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+      exp/tri3/graph_nosp_tgsmall data/${test_set} exp/tri3/decode_nosp_tgsmall_${test_set}
+    if $decode_rescoring; then
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+        data/$test_set exp/tri3/decode_nosp_{tgsmall,tglarge}_$test_set
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \
+        data/$test_set exp/tri3/decode_nosp_{tgsmall,fglarge}_$test_set
+    fi
+  fi
+fi 
+
+if [ $stage -le 9 ]; then
+  echo "#### Re-computing pronunciation model using tri3 model ###########"
+  # Now we compute the pronunciation and silence probabilities from training data,
+  # and re-create the lang directory.
+  # silence transition probability ...
+  steps/get_prons.sh --cmd "$train_cmd" \
+        data/train_clean data/lang_nosp exp/tri3
+  
+  utils/dict_dir_add_pronprobs.sh --max-normalize true \
+        data/local/dict_nosp \
+          exp/tri3/pron_counts_nowb.txt exp/tri3/sil_counts_nowb.txt \
+            exp/tri3/pron_bigram_counts_nowb.txt data/local/dict
+  
+  utils/prepare_lang.sh data/local/dict \
+        "<UNK>" data/local/lang_tmp data/lang
+  
+  local/format_lms.sh --src-dir data/lang data/local/lm
+  
+  utils/build_const_arpa_lm.sh \
+        data/local/lm/zeroth.lm.tg.arpa.gz data/lang data/lang_test_tglarge
+  utils/build_const_arpa_lm.sh \
+        data/local/lm/zeroth.lm.fg.arpa.gz data/lang data/lang_test_fglarge
+
+  if $decode; then
+    utils/mkgraph.sh data/lang_test_tgsmall exp/tri3 exp/tri3/graph_tgsmall
+    nspk=$(wc -l <data/${test_set}/spk2utt)
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+      exp/tri3/graph_tgsmall data/${test_set} exp/tri3/decode_tgsmall_${test_set}
+    if $decode_rescoring; then
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+        data/$test_set exp/tri3/decode_{tgsmall,tglarge}_$test_set
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+        data/$test_set exp/tri3/decode_{tgsmall,fglarge}_$test_set
+    fi
+  fi
+fi
+
+if [ $stage -le 10 ]; then
+
+  echo "#### SAT again on train_clean ###########"
+  # align the entire train_clean using the tri3 model
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    data/train_clean data/lang exp/tri3 exp/tri3_ali_train_clean
+  
+  # train another LDA+MLLT+SAT system on the entire train_clean
+  steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
+    data/train_clean data/lang exp/tri3_ali_train_clean exp/tri4
+ 
+  if $decode; then
+    utils/mkgraph.sh data/lang_test_tgsmall exp/tri4 exp/tri4/graph_tgsmall
+    nspk=$(wc -l <data/${test_set}/spk2utt)
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+      exp/tri4/graph_tgsmall data/${test_set} exp/tri4/decode_tgsmall_${test_set}
+    if $decode_rescoring; then
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+        data/$test_set exp/tri4/decode_{tgsmall,tglarge}_$test_set
+      steps/lmrescore_const_arpa.sh \
+        --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+        data/$test_set exp/tri4/decode_{tgsmall,fglarge}_$test_set
+    fi
+  fi 
+fi 
+
+echo "GMM trainig is Done"
+
+if $chain_train; then
+  ## online chain recipe using only clean data set
+  echo "#### chain training  ###########"
+  local/chain/run_tdnn.sh
+fi 
+
 exit 0;
 

From 6bd06afb6268784fbd36fcc53ba03f550ed25dfb Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Mon, 9 Jul 2018 20:07:09 -0700
Subject: [PATCH 04/26] cmd.sh cleaninig

---
 egs/zeroth_korean/s5/cmd.sh | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/egs/zeroth_korean/s5/cmd.sh b/egs/zeroth_korean/s5/cmd.sh
index 1687940f7d1..34031439792 100644
--- a/egs/zeroth_korean/s5/cmd.sh
+++ b/egs/zeroth_korean/s5/cmd.sh
@@ -10,16 +10,8 @@
 # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
 # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
 
-export train_cmd="run.pl --mem 2G"
-export decode_cmd="run.pl --mem 4G"
-export mkgraph_cmd="run.pl --mem 8G"
-export normalize_cmd="run.pl --mem 4G"
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
+export normalize_cmd="queue.pl --mem 4G"
 
-hostInAtlas="ares hephaestus jupiter neptune"
-if [[ ! -z $(echo $hostInAtlas | grep -o $(hostname -f)) ]]; then
-    queue_conf=conf/queue.conf
-    export train_cmd="queue.pl --config $queue_conf --mem 4G"
-    export decode_cmd="queue.pl --config $queue_conf --mem 8G"
-    export mkgraph_cmd="queue.pl --config $queue_conf --mem 16G"
-    export normalize_cmd="queue.pl --config $queue_conf --mem 4G"
-fi

From b00a81390368d9265de94b9bb1af2a105d99623d Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Tue, 10 Jul 2018 08:57:41 -0700
Subject: [PATCH 05/26] run.sh script fix

---
 egs/zeroth_korean/s5/local/format_lms.sh |  4 +++-
 egs/zeroth_korean/s5/run.sh              | 25 ++++++++++++------------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/format_lms.sh b/egs/zeroth_korean/s5/local/format_lms.sh
index 5947ae6b620..a9111e80eeb 100755
--- a/egs/zeroth_korean/s5/local/format_lms.sh
+++ b/egs/zeroth_korean/s5/local/format_lms.sh
@@ -45,7 +45,9 @@ trap "rm -r $tmpdir" EXIT
 
 mkdir -p $tmpdir
 
-for lm_suffix in tgsmall tgmed; do
+#lm_sets="tgsmall tgmed"
+lm_sets="tgsmall"
+for lm_suffix in ${lm_sets}; do
   # tglarge is prepared by a separate command, called from run.sh; we don't
   # want to compile G.fst for tglarge, as it takes a while.
   test=${src_dir}_test_${lm_suffix}
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index 033366f81b2..e410f9514d7 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -25,6 +25,7 @@ test_set="test_clean"
 
 . ./cmd.sh
 . ./path.sh
+. utils/parse_options.sh  # e.g. this parses the --stage option if supplied.
 
 # you might not want to do this for interactive shells.
 set -e
@@ -92,7 +93,7 @@ if [ $stage -le 5 ]; then
   utils/subset_data_dir.sh data/train_clean 10000 data/train_10k
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 6 ]; then
   echo "#### Monophone Training ###########"
   # train a monophone system & align
   steps/train_mono.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
@@ -100,7 +101,7 @@ if [ $stage -le 5 ]; then
   if $decode; then
     utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/mono exp/mono/graph_nosp_tgsmall
     nspk=$(wc -l <data/${test_set}/spk2utt)
-    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+    steps/decode.sh --nj $nspk --cmd "$decode_cmd" \
       exp/mono/graph_nosp_tgsmall data/${test_set} exp/mono/decode_nosp_tgsmall_${test_set}
     if $decode_rescoring; then
       steps/lmrescore_const_arpa.sh \
@@ -113,7 +114,7 @@ if [ $stage -le 5 ]; then
   fi
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   echo "#### Triphone Training, delta + delta-delta ###########"
   steps/align_si.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
   	data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
@@ -124,7 +125,7 @@ if [ $stage -le 6 ]; then
   if $decode; then
     utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri1 exp/tri1/graph_nosp_tgsmall
     nspk=$(wc -l <data/${test_set}/spk2utt)
-    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+    steps/decode.sh --nj $nspk --cmd "$decode_cmd" \
       exp/tri1/graph_nosp_tgsmall data/${test_set} exp/tri1/decode_nosp_tgsmall_${test_set}
     if $decode_rescoring; then
       steps/lmrescore_const_arpa.sh \
@@ -137,7 +138,7 @@ if [ $stage -le 6 ]; then
   fi
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 8 ]; then
   echo "#### Triphone Training, LDA+MLLT ###########"
   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
     data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k
@@ -148,7 +149,7 @@ if [ $stage -le 7 ]; then
   if $decode; then
     utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri2 exp/tri2/graph_nosp_tgsmall
     nspk=$(wc -l <data/${test_set}/spk2utt)
-    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+    steps/decode.sh --nj $nspk --cmd "$decode_cmd" \
       exp/tri2/graph_nosp_tgsmall data/${test_set} exp/tri2/decode_nosp_tgsmall_${test_set}
     if $decode_rescoring; then
       steps/lmrescore_const_arpa.sh \
@@ -162,7 +163,7 @@ if [ $stage -le 7 ]; then
 fi
 
 
-if [ $stage -le 8 ]; then
+if [ $stage -le 9 ]; then
   echo "#### Triphone Training, LDA+MLLT+SAT ###########"
   # Align the entire train_clean using the tri2 model
   steps/align_si.sh  --nj $nj --cmd "$train_cmd" --use-graphs true \
@@ -174,7 +175,7 @@ if [ $stage -le 8 ]; then
   if $decode; then
     utils/mkgraph.sh data/lang_nosp_test_tgsmall exp/tri3 exp/tri3/graph_nosp_tgsmall
     nspk=$(wc -l <data/${test_set}/spk2utt)
-    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd" \
       exp/tri3/graph_nosp_tgsmall data/${test_set} exp/tri3/decode_nosp_tgsmall_${test_set}
     if $decode_rescoring; then
       steps/lmrescore_const_arpa.sh \
@@ -187,7 +188,7 @@ if [ $stage -le 8 ]; then
   fi
 fi 
 
-if [ $stage -le 9 ]; then
+if [ $stage -le 10 ]; then
   echo "#### Re-computing pronunciation model using tri3 model ###########"
   # Now we compute the pronunciation and silence probabilities from training data,
   # and re-create the lang directory.
@@ -213,7 +214,7 @@ if [ $stage -le 9 ]; then
   if $decode; then
     utils/mkgraph.sh data/lang_test_tgsmall exp/tri3 exp/tri3/graph_tgsmall
     nspk=$(wc -l <data/${test_set}/spk2utt)
-    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd" \
       exp/tri3/graph_tgsmall data/${test_set} exp/tri3/decode_tgsmall_${test_set}
     if $decode_rescoring; then
       steps/lmrescore_const_arpa.sh \
@@ -226,7 +227,7 @@ if [ $stage -le 9 ]; then
   fi
 fi
 
-if [ $stage -le 10 ]; then
+if [ $stage -le 11 ]; then
 
   echo "#### SAT again on train_clean ###########"
   # align the entire train_clean using the tri3 model
@@ -240,7 +241,7 @@ if [ $stage -le 10 ]; then
   if $decode; then
     utils/mkgraph.sh data/lang_test_tgsmall exp/tri4 exp/tri4/graph_tgsmall
     nspk=$(wc -l <data/${test_set}/spk2utt)
-    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd"
+    steps/decode_fmllr.sh --nj $nspk --cmd "$decode_cmd" \
       exp/tri4/graph_tgsmall data/${test_set} exp/tri4/decode_tgsmall_${test_set}
     if $decode_rescoring; then
       steps/lmrescore_const_arpa.sh \

From 8b499a54f0f394979656234618aa5858568229b2 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Wed, 11 Jul 2018 10:45:19 -0700
Subject: [PATCH 06/26] add RESULTS page with minor typo fix

---
 egs/zeroth_korean/s5/RESULTS                  | 63 +++++++++++++++++++
 .../s5/local/chain/tuning/run_tdnn_1a.sh      |  4 +-
 egs/zeroth_korean/s5/run.sh                   |  7 +--
 3 files changed, 69 insertions(+), 5 deletions(-)
 create mode 100644 egs/zeroth_korean/s5/RESULTS

diff --git a/egs/zeroth_korean/s5/RESULTS b/egs/zeroth_korean/s5/RESULTS
new file mode 100644
index 00000000000..d8503cfcac4
--- /dev/null
+++ b/egs/zeroth_korean/s5/RESULTS
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# this RESULTS file was obtained by Wonkyum Lee in July 2018.
+
+for dir in exp/*; do
+  steps/info/gmm_dir_info.pl $dir
+  for x in $dir/decode*test*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done
+done
+exit 0
+
+# monophone, trained on the 2k shortest utterances
+exp/mono: nj=16 align prob=-99.85 over 2.66h [retry=0.8%, fail=0.3%] states=130 gauss=1004
+%WER 70.24 [ 6499 / 9253, 295 ins, 1399 del, 4805 sub ] exp/mono/decode_nosp_fglarge_test_clean/wer_8_0.5
+%WER 71.28 [ 6596 / 9253, 185 ins, 1721 del, 4690 sub ] exp/mono/decode_nosp_tglarge_test_clean/wer_9_1.0
+%WER 78.83 [ 7294 / 9253, 218 ins, 1752 del, 5324 sub ] exp/mono/decode_nosp_tgsmall_test_clean/wer_10_0.0
+
+# first triphone build, trained on 5k utterances
+exp/tri1: nj=16 align prob=-98.34 over 11.55h [retry=1.6%, fail=0.6%] states=1568 gauss=10030 tree-impr=4.07
+%WER 37.44 [ 3464 / 9253, 258 ins, 725 del, 2481 sub ] exp/tri1/decode_nosp_fglarge_test_clean/wer_15_0.5
+%WER 38.85 [ 3595 / 9253, 347 ins, 633 del, 2615 sub ] exp/tri1/decode_nosp_tglarge_test_clean/wer_15_0.0
+%WER 53.23 [ 4925 / 9253, 296 ins, 1060 del, 3569 sub ] exp/tri1/decode_nosp_tgsmall_test_clean/wer_15_0.0
+
+# tri2 is an LDA+MLLT systemm, trained on 10k utterances
+exp/tri2: nj=16 align prob=-49.63 over 23.00h [retry=1.7%, fail=0.8%] states=2000 gauss=15039 tree-impr=4.70 lda-sum=18.11 mllt:impr,logdet=0.99,1.39
+%WER 33.50 [ 3100 / 9253, 248 ins, 626 del, 2226 sub ] exp/tri2/decode_nosp_fglarge_test_clean/wer_16_0.5
+%WER 34.55 [ 3197 / 9253, 315 ins, 537 del, 2345 sub ] exp/tri2/decode_nosp_tglarge_test_clean/wer_16_0.0
+%WER 48.98 [ 4532 / 9253, 303 ins, 903 del, 3326 sub ] exp/tri2/decode_nosp_tgsmall_test_clean/wer_14_0.0
+
+# tri3 is an LDA+MLLT+SAT system, trained on entire clean training set
+exp/tri3: nj=16 align prob=-48.95 over 51.22h [retry=1.6%, fail=0.7%] states=3336 gauss=40065 fmllr-impr=2.72 over 19.18h tree-impr=7.23
+%WER 23.89 [ 2211 / 9253, 233 ins, 404 del, 1574 sub ] exp/tri3/decode_nosp_fglarge_test_clean/wer_15_0.0
+%WER 24.47 [ 2264 / 9253, 252 ins, 385 del, 1627 sub ] exp/tri3/decode_nosp_tglarge_test_clean/wer_13_0.0
+%WER 37.81 [ 3499 / 9253, 274 ins, 671 del, 2554 sub ] exp/tri3/decode_nosp_tgsmall_test_clean/wer_13_0.0
+%WER 49.00 [ 4534 / 9253, 302 ins, 874 del, 3358 sub ] exp/tri3/decode_nosp_tgsmall_test_clean.si/wer_14_0.0
+%WER 21.68 [ 2006 / 9253, 226 ins, 346 del, 1434 sub ] exp/tri3/decode_fglarge_test_clean/wer_15_0.0
+%WER 22.59 [ 2090 / 9253, 231 ins, 372 del, 1487 sub ] exp/tri3/decode_tglarge_test_clean/wer_15_0.0
+%WER 34.83 [ 3223 / 9253, 294 ins, 605 del, 2324 sub ] exp/tri3/decode_tgsmall_test_clean/wer_12_0.0
+%WER 45.28 [ 4190 / 9253, 270 ins, 880 del, 3040 sub ] exp/tri3/decode_tgsmall_test_clean.si/wer_15_0.0
+
+# tri4 is an LDA+MLLT+SAT system after estimating pronunciation probabilities
+# and word-and-pronunciation-dependent silence probabilities.
+exp/tri4: nj=16 align prob=-48.70 over 51.22h [retry=1.5%, fail=0.7%] states=3368 gauss=40039 fmllr-impr=0.23 over 42.91h tree-impr=7.87
+%WER 21.61 [ 2000 / 9253, 210 ins, 379 del, 1411 sub ] exp/tri4/decode_fglarge_test_clean/wer_14_0.5
+%WER 22.59 [ 2090 / 9253, 237 ins, 371 del, 1482 sub ] exp/tri4/decode_tglarge_test_clean/wer_15_0.0
+%WER 34.57 [ 3199 / 9253, 285 ins, 595 del, 2319 sub ] exp/tri4/decode_tgsmall_test_clean/wer_12_0.0
+%WER 45.82 [ 4240 / 9253, 270 ins, 833 del, 3137 sub ] exp/tri4/decode_tgsmall_test_clean.si/wer_13_0.0
+
+for dir in exp/chain/tdnn*_sp; do
+  steps/info/chain_dir_info.pl $dir
+  for x in ${dir}_online/decode*test*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done
+done
+exit 0
+
+# tdnn_1a is a kind of factorized TDNN, with skip connections.
+exp/chain/tdnn_1a_sp: num-iters=72 nj=3..16 num-params=18.6M dim=40+100->3040 combine=-0.046->-0.045 (over 3) xent:train/valid[47,71,final]=(-0.898,-0.775,-0.766/-0.967,-0.855,-0.845) logprob:train/valid[47,71,final]=(-0.056,-0.043,-0.043/-0.069,-0.057,-0.057)
+%WER 11.42 [ 1057 / 9253, 128 ins, 193 del, 736 sub ] exp/chain/tdnn_1a_sp_online/decode_fglarge_test_clean/wer_16_1.0
+%WER 19.25 [ 1781 / 9253, 188 ins, 291 del, 1302 sub ] exp/chain/tdnn_1a_sp_online/decode_tgsmall_test_clean/wer_11_0.5
+
+# This chain system has TDNN+Norm-OPGRU architecture. 
+exp/chain/tdnn_opgru_1a_sp: num-iters=130 nj=2..12 num-params=37.9M dim=40+100->3000 combine=-0.040->-0.038 (over 6) xent:train/valid[85,129,final]=(-1.12,-0.608,-0.616/-1.21,-0.697,-0.705) logprob:train/valid[85,129,final]=(-0.062,-0.027,-0.027/-0.067,-0.030,-0.030)
+%WER 9.33 [ 863 / 9253, 101 ins, 162 del, 600 sub ] exp/chain/tdnn_opgru_1a_sp_online/decode_fglarge_test_clean/wer_8_1.0
+%WER 15.13 [ 1400 / 9253, 154 ins, 217 del, 1029 sub ] exp/chain/tdnn_opgru_1a_sp_online/decode_tgsmall_test_clean/wer_9_0.0
+
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 36ef3f08aad..0a01d08b8f4 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -58,13 +58,15 @@ fi
 
 local/nnet3/run_ivector_common.sh --stage $stage --speed-perturb ${speed_perturb}
 
+suffix=
 if [ "$speed_perturb" == "true" ]; then
   train_set=${train_set}_sp
+  suffix=_sp
 fi
 
 gmm_dir=exp/${gmm}
 lat_dir=exp/chain/${gmm}_${train_set}_lats
-dir=exp/chain/tdnn${affix}
+dir=exp/chain/tdnn_${affix}${suffix}
 train_data_dir=data/${train_set}_hires
 train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
 lores_train_data_dir=data/${train_set}
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index e410f9514d7..32f99863cc5 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -95,7 +95,7 @@ fi
 
 if [ $stage -le 6 ]; then
   echo "#### Monophone Training ###########"
-  # train a monophone system & align
+  # train a monophone system with 2k short utts
   steps/train_mono.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
   	data/train_2kshort data/lang_nosp exp/mono
   if $decode; then
@@ -118,8 +118,7 @@ if [ $stage -le 7 ]; then
   echo "#### Triphone Training, delta + delta-delta ###########"
   steps/align_si.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
   	data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
-  # train a first delta + delta-delta triphone system on a subset of 5000 utterancesa
-  # number of maximum pdf, gaussian (under/over fitting)
+  # train a first delta + delta-delta triphone system on a subset of 5000 utterances
   steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
       2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1
   if $decode; then
@@ -257,7 +256,7 @@ fi
 echo "GMM trainig is Done"
 
 if $chain_train; then
-  ## online chain recipe using only clean data set
+  ## Training Chain Acoustic model using clean data set
   echo "#### chain training  ###########"
   local/chain/run_tdnn.sh
 fi 

From 5c22bab9483015b628219c6770c3cd3faa08ba68 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Thu, 12 Jul 2018 09:33:32 -0700
Subject: [PATCH 07/26] run_tdnn_1a.sh fix

---
 .../s5/local/chain/tuning/run_tdnn_1a.sh      | 125 +++++++++---------
 1 file changed, 61 insertions(+), 64 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 0a01d08b8f4..381b13492d0 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -20,22 +20,18 @@ common_egs_dir=
 # LSTM/chain options
 train_stage=-10
 xent_regularize=0.1
-max_param_change=2.0
+dropout_schedule='0,0@0.20,0.5@0.50,0'
 
 # training chunk-options
-get_egs_stage=-10
-chunk_width=150,110,100
+chunk_width=140,100,160
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
 
 # training options
-num_jobs_initial=3
-num_jobs_final=16
-num_epochs=6
-minibatch_size=128
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
+srand=0
 remove_egs=true
 
-
 #decode options
 test_online_decoding=true  # if true, it will run the last decoding stage.
 
@@ -66,7 +62,7 @@ fi
 
 gmm_dir=exp/${gmm}
 lat_dir=exp/chain/${gmm}_${train_set}_lats
-dir=exp/chain/tdnn_${affix}${suffix}
+dir=exp/chain/tdnn${affix}${suffix}
 train_data_dir=data/${train_set}_hires
 train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
 lores_train_data_dir=data/${train_set}
@@ -142,14 +138,16 @@ fi
 if [ $stage -le 11 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
+
   num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  opts="l2-regularize=0.002"
-  linear_opts="orthonormal-constraint=1.0"
-  output_opts="l2-regularize=0.0005 bottleneck-dim=256"
+  tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.005"
 
   mkdir -p $dir/configs
-
   cat <<EOF > $dir/configs/network.xconfig
   input dim=100 name=ivector
   input dim=40 name=input
@@ -160,34 +158,28 @@ if [ $stage -le 11 ]; then
   fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 
   # the first splicing is moved before the lda layer, so no splicing here
-  relu-batchnorm-layer name=tdnn1 $opts dim=1280
-  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
-  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
-  linear-component name=tdnn3l dim=256 $linear_opts
-  relu-batchnorm-layer name=tdnn3 $opts dim=1280
-  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
-  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
-  linear-component name=tdnn5l dim=256 $linear_opts
-  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
-  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn7l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
-  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
-  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
-  linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
-  relu-batchnorm-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn8l,tdnn6l) dim=1280
-  linear-component name=prefinal-l dim=256 $linear_opts
-
-  relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
+  relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1024
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  linear-component name=prefinal-l dim=192 $linear_opts
+
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
   output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
 
-  relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
   output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+
 EOF
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
 fi
@@ -199,33 +191,38 @@ if [ $stage -le 12 ]; then
      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
   fi
 
-  steps/nnet3/chain/train.py --stage $train_stage \
-    --cmd "$decode_cmd" \
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
     --feat.online-ivector-dir=$train_ivector_dir \
-    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
     --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.0 \
-    --chain.apply-deriv-weights false \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=2000" \
-    --trainer.max-param-change $max_param_change \
-    --trainer.num-epochs $num_epochs \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --egs.stage $get_egs_stage \
-    --egs.chunk-width $chunk_width \
-    --egs.dir "$common_egs_dir" \
-    --egs.opts "--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs $remove_egs \
-    --use-gpu true \
-    --feat-dir $train_data_dir \
-    --tree-dir $tree_dir \
-    --lat-dir $lat_dir \
-    --dir $dir  || exit 1;
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=10 \
+    --trainer.frames-per-iter=2000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=8 \
+    --trainer.optimization.initial-effective-lrate=0.0005 \
+    --trainer.optimization.final-effective-lrate=0.00005 \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+
 fi
 
 if [ $stage -le 13 ]; then

From 73b9bdbb0876454d7cedd1553db863d473b3c2d3 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 15:51:39 -0700
Subject: [PATCH 08/26] tdnn_opgru_1a change

---
 .../local/chain/tuning/run_tdnn_opgru_1a.sh   | 41 ++++++++-----------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index e0404cd3d7c..4fd92f5b346 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -20,33 +20,25 @@ common_egs_dir=
 # OPGRU/chain options
 train_stage=-10
 get_egs_stage=-10
+
 xent_regularize=0.1
-label_delay=5
-max_param_change=2.0
+dropout_schedule='0,0@0.20,0.2@0.50,0'
 
-# training chunk-options
-chunk_width=150
+chunk_width=140,100,160
 chunk_left_context=40
 chunk_right_context=0
-frames_per_chunk=
+label_delay=5
 
-extra_left_context=50
-extra_right_context=0
-
-# training options
-srand=0
-num_jobs_initial=2
-num_jobs_final=12
-num_epochs=8
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-dropout_schedule='0,0@0.20,0.2@0.50,0'
 remove_egs=true
 
 
 #decode options
 test_online_decoding=true  # if true, it will run the last decoding stage.
 
+# decode options
+extra_left_context=50
+frames_per_chunk=
+
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -74,7 +66,7 @@ fi
 
 gmm_dir=exp/${gmm}
 lat_dir=exp/chain/${gmm}_${train_set}_lats
-dir=exp/chain/tdnn_opgru_${affix}${suffix}
+dir=exp/chain/tdnn_opgru${affix}${suffix}
 train_data_dir=data/${train_set}_hires
 train_ivector_dir=exp/nnet3/ivectors_${train_set}_hires
 lores_train_data_dir=data/${train_set}
@@ -225,14 +217,14 @@ if [ $stage -le 12 ]; then
     --egs.chunk-left-context-initial 0 \
     --egs.chunk-right-context-final 0 \
     --trainer.num-chunk-per-minibatch 64,32 \
-    --trainer.frames-per-iter 1500000 \
-    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 2000000 \
+    --trainer.num-epochs=8 \
     --trainer.optimization.shrink-value 0.99 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
-    --trainer.max-param-change $max_param_change \
+    --trainer.optimization.num-jobs-initial 2 \
+    --trainer.optimization.num-jobs-final 12 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
     --trainer.deriv-truncate-margin 8 \
     --cleanup.remove-egs true \
     --feat-dir $train_data_dir \
@@ -275,6 +267,7 @@ if $test_online_decoding && [ $stage -le 14 ]; then
       for lmtype in tgsmall; do
         steps/online/nnet3/decode.sh \
           --acwt 1.0 --post-decode-acwt 10.0 \
+		  --extra-left-context-initial 0 \
           --nj $nspk --cmd "$decode_cmd" \
           $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1
       done

From bc67d9f94f0575b55593feddfd86e4dd0103e8d9 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 16:03:53 -0700
Subject: [PATCH 09/26] add README.txt

---
 egs/zeroth_korean/s5/README.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 egs/zeroth_korean/s5/README.txt

diff --git a/egs/zeroth_korean/s5/README.txt b/egs/zeroth_korean/s5/README.txt
new file mode 100644
index 00000000000..daa007362d8
--- /dev/null
+++ b/egs/zeroth_korean/s5/README.txt
@@ -0,0 +1,13 @@
+Zeroth-Korean kaldi example is from Zeroth Project. Zeroth project introduces free Korean speech corpus and aims to make Korean speech recognition more broadly accessible to everyone. This project was developed in collaboration between Lucas Jo(@Atlas Guide Inc.) and Wonkyum Lee(@Gridspace Inc.). 
+
+In this example, we are using 51.6 hours transcribed Korean audio for training data (22,263 utterances, 105 people, 3000 sentences) and 1.2 hours transcribed Korean audio for testing data (457 utterances, 10 people). Besides audio and transcription, we provide pre-trained/designed language model, lexicon and morpheme-based segmenter(morfessor)
+
+The database can be also downloaded from openslr:
+http://www.openslr.org/40
+
+The database is licensed under Attribution 4.0 International (CC BY 4.0)
+
+This folder contains a speech recognition recipe which is based on WSJ/Librispeech example.
+
+For more details about Zeroth project, please visit:
+https://github.com/goodatlas/zeroth

From 6554ff0ddb4dc11ff1b51c377ee29cf4b8576a0b Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 16:23:37 -0700
Subject: [PATCH 10/26] compare_wer.sh script

---
 .../s5/local/chain/compare_wer.sh             | 107 ++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100755 egs/zeroth_korean/s5/local/chain/compare_wer.sh

diff --git a/egs/zeroth_korean/s5/local/chain/compare_wer.sh b/egs/zeroth_korean/s5/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..e8366bfb358
--- /dev/null
+++ b/egs/zeroth_korean/s5/local/chain/compare_wer.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER test_clean (tgsmall)          "
+  "#WER test_clean (fglarge)            ")
+
+for n in 0 1 ; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+     decode_names=(tgsmall_test_clean fglarge_test_clean)
+
+     wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-params              "
+for x in $*; do
+  printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+done
+echo

From 7e82148abdc024d55ba03f6cb76f3aec9fde727d Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 16:23:49 -0700
Subject: [PATCH 11/26] result and diagnostics added

---
 .../s5/local/chain/tuning/run_tdnn_1a.sh        | 17 +++++++++++++++++
 .../s5/local/chain/tuning/run_tdnn_opgru_1a.sh  | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 381b13492d0..3809c1cc31c 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -2,6 +2,23 @@
 
 set -e -o pipefail
 
+# This recipe trains TDNN-F AM
+# The training recipe is from WSJ example(egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh)
+
+# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp
+# exp/chain/tdnn1a_sp: num-iters=174 nj=2..8 num-params=8.4M dim=40+100->3040 combine=-0.049->-0.048 (over 3) xent:train/valid[115,173,final]=(-1.23,-0.838,-0.839/-1.22,-0.863,-0.859) logprob:train/valid[115,173,final]=(-0.091,-0.053,-0.053/-0.087,-0.056,-0.055)
+
+# ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
+# System                tdnn1a_sp
+#WER test_clean (tgsmall)               19.11
+#WER test_clean (fglarge)                 11.06
+# Final train prob        -0.0527
+# Final valid prob        -0.0545
+# Final train prob (xent)   -0.8395
+# Final valid prob (xent)   -0.8590
+# Num-params                 8426432
+
+
 # First the options that are passed through to run_ivector_common.sh
 # (some of which are also used in this script directly).
 stage=0
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 4fd92f5b346..1ea023c4b42 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -2,6 +2,23 @@
 
 set -e -o pipefail
 
+# This is recipe using TDNN+Norm-OPGRU. 
+# The recipe is based on AMI example.(egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh) 
+
+# steps/info/chain_dir_info.pl exp/chain/tdnn_opgru1a_sp
+# exp/chain/tdnn_opgru1a_sp: num-iters=99 nj=2..12 num-params=38.0M dim=40+100->3040 combine=-0.045->-0.045 (over 1) xent:train/valid[65,98,final]=(-1.19,-0.661,-0.647/-1.21,-0.696,-0.680) logprob:train/valid[65,98,final]=(-0.080,-0.039,-0.038/-0.076,-0.039,-0.038)
+
+# ./local/chain/compare_wer.sh exp/chain/tdnn_opgru1a_sp
+# System                tdnn_opgru1a_sp
+#WER test_clean (tgsmall)               15.17
+#WER test_clean (fglarge)                  9.14
+# Final train prob        -0.0380
+# Final valid prob        -0.0378
+# Final train prob (xent)   -0.6470
+# Final valid prob (xent)   -0.6805
+# Num-params                37970368
+
+
 # First the options that are passed through to run_ivector_common.sh
 # (some of which are also used in this script directly).
 stage=0

From 62d6b36850f65618aa873035d1c1deb5fe090f6b Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 16:37:16 -0700
Subject: [PATCH 12/26] frames-per-chunk added on decoding script

---
 egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 1ea023c4b42..8fc949f24ae 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -284,6 +284,7 @@ if $test_online_decoding && [ $stage -le 14 ]; then
       for lmtype in tgsmall; do
         steps/online/nnet3/decode.sh \
           --acwt 1.0 --post-decode-acwt 10.0 \
+          --frames-per-chunk 140 \
 		  --extra-left-context-initial 0 \
           --nj $nspk --cmd "$decode_cmd" \
           $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_test_${data_affix} || exit 1

From 8ec007981ad5778d93c7cce08cdc0cd7e085c09a Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 16:39:48 -0700
Subject: [PATCH 13/26] chunk left right

this is from egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
---
 .../s5/local/chain/tuning/run_tdnn_opgru_1a.sh              | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 8fc949f24ae..097d9f4f4e9 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -42,8 +42,6 @@ xent_regularize=0.1
 dropout_schedule='0,0@0.20,0.2@0.50,0'
 
 chunk_width=140,100,160
-chunk_left_context=40
-chunk_right_context=0
 label_delay=5
 
 remove_egs=true
@@ -226,8 +224,8 @@ if [ $stage -le 12 ]; then
     --egs.dir "$common_egs_dir" \
     --egs.opts "--frames-overlap-per-eg 0" \
     --egs.chunk-width $chunk_width \
-    --egs.chunk-left-context $chunk_left_context \
-    --egs.chunk-right-context $chunk_right_context \
+    --egs.chunk-left-context 40 \
+    --egs.chunk-right-context 0 \
     --trainer.dropout-schedule $dropout_schedule \
     --trainer.optimization.backstitch-training-scale 0.3 \
     --trainer.optimization.backstitch-training-interval 1 \

From 3526f60871cb0017082fc2b8fdfe7519a5fa2691 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum.lee@gmail.com>
Date: Fri, 13 Jul 2018 16:45:16 -0700
Subject: [PATCH 14/26] omit $mfccdir

---
 egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
index b3b60629a8c..38f9871e1f0 100755
--- a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -54,13 +54,12 @@ if [ $stage -le 3 ]; then
   # MFCC dir across multiple locations.  You might want to be careful here, if you
   # have multiple copies of Kaldi checked out and run the same recipe, not to let
   # them overwrite each other.
-  mfccdir=mfcc_hires
 
   for datadir in ${trainset} ; do
     utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
     steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
-      --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
-    steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;
+      --cmd "$train_cmd" data/${datadir}_hires  || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires  || exit 1;
   done
 
   # We need to build a small system just because we need the LDA+MLLT transform

From 6d19ab2765a625b30ab6c4a06b84a344126c829e Mon Sep 17 00:00:00 2001
From: Lucas Jo <jty016>
Date: Mon, 20 Aug 2018 15:10:31 +0000
Subject: [PATCH 15/26] removed locale dependency

---
 egs/zeroth_korean/s5/local/updateSegmentation.sh | 2 +-
 egs/zeroth_korean/s5/path.sh                     | 2 +-
 egs/zeroth_korean/s5/run.sh                      | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/updateSegmentation.sh b/egs/zeroth_korean/s5/local/updateSegmentation.sh
index aa025765aae..e1eea821645 100755
--- a/egs/zeroth_korean/s5/local/updateSegmentation.sh
+++ b/egs/zeroth_korean/s5/local/updateSegmentation.sh
@@ -28,7 +28,7 @@ cp $trans $trans".old"
 awk '{print $1}' $trans".old" > $trans"_tmp_index"
 cut -d' ' -f2- $trans".old" |\
 	sed -E 's/\s+/ /g; s/^\s//g; s/\s$//g' |\
-	morfessor -l $lmDir/zeroth_morfessor.seg -T - -o - \
+	morfessor -e 'utf-8' -l $lmDir/zeroth_morfessor.seg -T - -o - \
 	--output-format '{analysis} ' --output-newlines \
 	--nosplit-re '[0-9\[\]\(\){}a-zA-Z&.,\-]+' \
 	| paste -d" " $trans"_tmp_index" - > $trans
diff --git a/egs/zeroth_korean/s5/path.sh b/egs/zeroth_korean/s5/path.sh
index 91c09618924..2d17b17a84a 100755
--- a/egs/zeroth_korean/s5/path.sh
+++ b/egs/zeroth_korean/s5/path.sh
@@ -3,4 +3,4 @@ export KALDI_ROOT=`pwd`/../../..
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=ko_KR.UTF-8
+export LC_ALL=C
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index 32f99863cc5..fbc584e163a 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -11,8 +11,7 @@
 #
 
 # Check list before start
-# 1. locale setup (see egs/zeroth_korean/s5/path.sh; you need this "export LC_ALL=ko_KR.UTF-8" )
-# 2. required software: Morfessor-2.0.1 (see tools/extras/install_morfessor.sh)
+# 1. required software: Morfessor-2.0.1 (see tools/extras/install_morfessor.sh)
 
 stage=0
 db_dir=./db

From 11d1a07d52a1a66b1f24b7b34f1b38ca995f36d4 Mon Sep 17 00:00:00 2001
From: Lucas Jo <jty016@gmail.com>
Date: Mon, 20 Aug 2018 15:12:31 +0000
Subject: [PATCH 16/26] removed locale dependency

---
 egs/zeroth_korean/s5/local/updateSegmentation.sh | 2 +-
 egs/zeroth_korean/s5/path.sh                     | 2 +-
 egs/zeroth_korean/s5/run.sh                      | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/updateSegmentation.sh b/egs/zeroth_korean/s5/local/updateSegmentation.sh
index aa025765aae..e1eea821645 100755
--- a/egs/zeroth_korean/s5/local/updateSegmentation.sh
+++ b/egs/zeroth_korean/s5/local/updateSegmentation.sh
@@ -28,7 +28,7 @@ cp $trans $trans".old"
 awk '{print $1}' $trans".old" > $trans"_tmp_index"
 cut -d' ' -f2- $trans".old" |\
 	sed -E 's/\s+/ /g; s/^\s//g; s/\s$//g' |\
-	morfessor -l $lmDir/zeroth_morfessor.seg -T - -o - \
+	morfessor -e 'utf-8' -l $lmDir/zeroth_morfessor.seg -T - -o - \
 	--output-format '{analysis} ' --output-newlines \
 	--nosplit-re '[0-9\[\]\(\){}a-zA-Z&.,\-]+' \
 	| paste -d" " $trans"_tmp_index" - > $trans
diff --git a/egs/zeroth_korean/s5/path.sh b/egs/zeroth_korean/s5/path.sh
index 91c09618924..2d17b17a84a 100755
--- a/egs/zeroth_korean/s5/path.sh
+++ b/egs/zeroth_korean/s5/path.sh
@@ -3,4 +3,4 @@ export KALDI_ROOT=`pwd`/../../..
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=ko_KR.UTF-8
+export LC_ALL=C
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index 32f99863cc5..fbc584e163a 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -11,8 +11,7 @@
 #
 
 # Check list before start
-# 1. locale setup (see egs/zeroth_korean/s5/path.sh; you need this "export LC_ALL=ko_KR.UTF-8" )
-# 2. required software: Morfessor-2.0.1 (see tools/extras/install_morfessor.sh)
+# 1. required software: Morfessor-2.0.1 (see tools/extras/install_morfessor.sh)
 
 stage=0
 db_dir=./db

From c75b1f83bd82687c4ed0d9eef4e4ce8471d53c38 Mon Sep 17 00:00:00 2001
From: Lucas Jo <jty016@gmail.com>
Date: Mon, 20 Aug 2018 15:28:28 +0000
Subject: [PATCH 17/26] changed filename

---
 .../s5/local/{updateSegmentation.sh => update_segmentation.sh}    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename egs/zeroth_korean/s5/local/{updateSegmentation.sh => update_segmentation.sh} (100%)

diff --git a/egs/zeroth_korean/s5/local/updateSegmentation.sh b/egs/zeroth_korean/s5/local/update_segmentation.sh
similarity index 100%
rename from egs/zeroth_korean/s5/local/updateSegmentation.sh
rename to egs/zeroth_korean/s5/local/update_segmentation.sh

From d1b227779849b6563dfc95a73b500ee280b93248 Mon Sep 17 00:00:00 2001
From: Lucas Jo <jty016@gmail.com>
Date: Mon, 20 Aug 2018 15:42:33 +0000
Subject: [PATCH 18/26] re-indented with no tab

---
 egs/zeroth_korean/s5/local/prepare_dict.sh | 28 +++++++++++-----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/prepare_dict.sh b/egs/zeroth_korean/s5/local/prepare_dict.sh
index a4038ed7f43..76c6821e11e 100755
--- a/egs/zeroth_korean/s5/local/prepare_dict.sh
+++ b/egs/zeroth_korean/s5/local/prepare_dict.sh
@@ -7,9 +7,9 @@
 # Prepare dictionary
 
 if [ $# -ne 2 ]; then
-	echo "Usage: $0 <lm-dir> <dst-dir>"
-	echo "e.g.: /data/local/lm data/local/dict_nosp"
-	exit 1
+  echo "Usage: $0 <lm-dir> <dst-dir>"
+  echo "e.g.: /data/local/lm data/local/dict_nosp"
+  exit 1
 fi
 lm_dir=$1
 dst_dir=$2
@@ -20,7 +20,7 @@ mkdir -p $dst_dir || exit 1;
 lexicon_raw_nosil=$dst_dir/lexicon_raw_nosil.txt
 
 if [[ ! -s "$lexicon_raw_nosil" ]]; then
-	cp $lm_dir/zeroth_lexicon $lexicon_raw_nosil || exit 1
+  cp $lm_dir/zeroth_lexicon $lexicon_raw_nosil || exit 1
 fi
 
 silence_phones=$dst_dir/silence_phones.txt
@@ -35,31 +35,31 @@ echo SIL > $optional_silence
 # nonsilence phones; on each line is a list of phones that correspond
 # really to the same base phone.
 awk '{for (i=2; i<=NF; ++i) { print $i; gsub(/[0-9]/, "", $i); print $i}}' $lexicon_raw_nosil |\
-	sort -u |\
-	perl -e 'while(<>){
-chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_";
-$phones_of{$1} .= "$_ "; }
-foreach $list (values %phones_of) {print $list . "\n"; } ' \
-	> $nonsil_phones || exit 1;
+  sort -u |\
+  perl -e 'while(<>){
+    chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_";
+    $phones_of{$1} .= "$_ "; }
+    foreach $list (values %phones_of) {print $list . "\n"; } ' \
+    > $nonsil_phones || exit 1;
 # A few extra questions that will be added to those obtained by
 # automatically clustering
 # the "real" phones.  These ask about stress; there's also one for
 # silence.
 cat $silence_phones| awk '{printf("%s ", $1);} END{printf "\n";}' > $extra_questions || exit 1;
 cat $nonsil_phones | perl -e 'while(<>){ foreach $p (split(" ", $_)){
-	$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
-		>> $extra_questions || exit 1;
+$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+  >> $extra_questions || exit 1;
 
 echo "$(wc -l <$silence_phones) silence phones saved to: $silence_phones"
 echo "$(wc -l <$optional_silence) optional silence saved to: $optional_silence"
 echo "$(wc -l <$nonsil_phones) non-silence phones saved to: $nonsil_phones"
-echo "$(wc -l <$extra_questions) extra triphone	clustering-related questions saved to: $extra_questions"
+echo "$(wc -l <$extra_questions) extra triphone clustering-related questions saved to: $extra_questions"
 
 #(echo '!SIL SIL'; echo '[BREATH] BRH'; echo '[NOISE] NSN'; echo '[COUGH] CGH';
 # echo '[SMACK] SMK'; echo '[UM] UM'; echo '[UH] UHH'
 #  echo '<UNK> NSN' ) | \
 (echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |\
-cat - $lexicon_raw_nosil | sort | uniq >$dst_dir/lexicon.txt
+  cat - $lexicon_raw_nosil | sort | uniq >$dst_dir/lexicon.txt
 echo "Lexicon text file saved as: $dst_dir/lexicon.txt"
 exit 0
 

From 17378f2af6d1be0b6b17edb7a851a428fbd62ea7 Mon Sep 17 00:00:00 2001
From: Lucas Jo <jty016@gmail.com>
Date: Mon, 20 Aug 2018 15:57:46 +0000
Subject: [PATCH 19/26] changed to use PCA instead of LDA+MLLT

---
 egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
index 38f9871e1f0..ea186be7b90 100755
--- a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -76,19 +76,17 @@ if [ $stage -le 4 ]; then
 
   mkdir exp -p exp/nnet3
 
-  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
-    --realign-iters "" \
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
     --splice-opts "--left-context=3 --right-context=3" \
-    3000 10000 data/${trainset}_hires data/lang_nosp \
-    ${gmmdir}_ali_${trainset} exp/nnet3/tri2
+    --max-utts 30000 --subsample 2 \
+    data/${trainset}_hires exp/nnet3/pca_transform
 fi
 
-
 if [ $stage -le 5 ]; then
   # To train a diagonal UBM we don't need very much data, so use a small subset
   # (actually, it's not that small: still around 100 hours).
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 700000 \
-    data/train_30k_hires 512 exp/nnet3/tri2 exp/nnet3/diag_ubm
+    data/train_30k_hires 512 exp/nnet3/pca_transform exp/nnet3/diag_ubm
 fi
 
 if [ $stage -le 6 ]; then

From 90400dd5d54ff73accfd3d99668ff48ef79e4140 Mon Sep 17 00:00:00 2001
From: Lucas Jo <jty016@gmail.com>
Date: Mon, 20 Aug 2018 16:02:59 +0000
Subject: [PATCH 20/26] added -bash on echo statements

---
 egs/zeroth_korean/s5/run.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index fbc584e163a..a049a2c2597 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -93,7 +93,7 @@ if [ $stage -le 5 ]; then
 fi
 
 if [ $stage -le 6 ]; then
-  echo "#### Monophone Training ###########"
+  echo "$0: #### Monophone Training ###########"
   # train a monophone system with 2k short utts
   steps/train_mono.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
   	data/train_2kshort data/lang_nosp exp/mono
@@ -114,7 +114,7 @@ if [ $stage -le 6 ]; then
 fi
 
 if [ $stage -le 7 ]; then
-  echo "#### Triphone Training, delta + delta-delta ###########"
+  echo "$0: #### Triphone Training, delta + delta-delta ###########"
   steps/align_si.sh --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
   	data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
   # train a first delta + delta-delta triphone system on a subset of 5000 utterances
@@ -137,7 +137,7 @@ if [ $stage -le 7 ]; then
 fi
 
 if [ $stage -le 8 ]; then
-  echo "#### Triphone Training, LDA+MLLT ###########"
+  echo "$0: #### Triphone Training, LDA+MLLT ###########"
   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
     data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k
   # train an LDA+MLLT system.
@@ -162,7 +162,7 @@ fi
 
 
 if [ $stage -le 9 ]; then
-  echo "#### Triphone Training, LDA+MLLT+SAT ###########"
+  echo "$0: #### Triphone Training, LDA+MLLT+SAT ###########"
   # Align the entire train_clean using the tri2 model
   steps/align_si.sh  --nj $nj --cmd "$train_cmd" --use-graphs true \
     data/train_clean data/lang_nosp exp/tri2 exp/tri2_ali_train_clean
@@ -187,7 +187,7 @@ if [ $stage -le 9 ]; then
 fi 
 
 if [ $stage -le 10 ]; then
-  echo "#### Re-computing pronunciation model using tri3 model ###########"
+  echo "$0: #### Re-computing pronunciation model using tri3 model ###########"
   # Now we compute the pronunciation and silence probabilities from training data,
   # and re-create the lang directory.
   # silence transition probability ...
@@ -227,7 +227,7 @@ fi
 
 if [ $stage -le 11 ]; then
 
-  echo "#### SAT again on train_clean ###########"
+  echo "$0: #### SAT again on train_clean ###########"
   # align the entire train_clean using the tri3 model
   steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
     data/train_clean data/lang exp/tri3 exp/tri3_ali_train_clean
@@ -252,11 +252,11 @@ if [ $stage -le 11 ]; then
   fi 
 fi 
 
-echo "GMM trainig is Done"
+echo "$0: GMM trainig is Done"
 
 if $chain_train; then
   ## Training Chain Acoustic model using clean data set
-  echo "#### chain training  ###########"
+  echo "$0: #### chain training  ###########"
   local/chain/run_tdnn.sh
 fi 
 

From 6d010aab37c0771dfacf5c32a5ba96fbd0448264 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Mon, 27 Aug 2018 14:43:59 -0700
Subject: [PATCH 21/26] fix pointing update_segmentation.sh in run.sh

---
 egs/zeroth_korean/s5/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index a049a2c2597..58db7a93ad9 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -45,7 +45,7 @@ fi
 if [ $stage -le 2 ]; then
   # update segmentation of transcripts
   for part in train_data_01 test_data_01; do
-  	local/updateSegmentation.sh data/$part data/local/lm
+  	local/update_segmentation.sh data/$part data/local/lm
   done
 fi
 

From bd8094f32e1c051480968477e785aa3c340dfa12 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Mon, 27 Aug 2018 14:45:12 -0700
Subject: [PATCH 22/26] simplified and added echo statement

---
 .../s5/local/nnet3/run_ivector_common.sh      | 27 +++++++++----------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
index ea186be7b90..7bde9a1ad9b 100755
--- a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -17,30 +17,22 @@ set -e
 
 if [ "$speed_perturb" == "true" ]; then
   if [ $stage -le 1 ]; then
+    echo "$0: preparing directory for speed-perturbed data"
     #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
     # _sp stands for speed-perturbed
-
     for datadir in ${trainset} ; do
-      utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
-      utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
-      utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
-      utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
-      rm -r data/temp1 data/temp2
+	  utils/data/perturb_data_dir_speed_3way.sh data/${datadir} data/${datadir}_sp 
 
       mfccdir=mfcc_perturbed
       steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 \
-        data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
-      steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
-      utils/fix_data_dir.sh data/${datadir}_tmp
-
-      utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
-      utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
+        data/${datadir}_sp exp/make_mfcc/${datadir}_sp $mfccdir || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_sp exp/make_mfcc/${datadir}_sp $mfccdir || exit 1;
       utils/fix_data_dir.sh data/${datadir}_sp
-      rm -r data/temp0 data/${datadir}_tmp
     done
   fi
 
   if [ $stage -le 2 ]; then
+	echo "$0: aligning with the perturbed low-resolution data"
     #obtain the alignment of the perturbed data
     steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
       data/${trainset}_sp data/lang_nosp ${gmmdir} ${gmmdir}_ali_${trainset}_sp || exit 1
@@ -55,6 +47,7 @@ if [ $stage -le 3 ]; then
   # have multiple copies of Kaldi checked out and run the same recipe, not to let
   # them overwrite each other.
 
+  echo "$0: creating high-resolution MFCC features"  
   for datadir in ${trainset} ; do
     utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
     steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
@@ -74,8 +67,8 @@ if [ $stage -le 4 ]; then
   # because after we get the transform (12th iter is the last), any further
   # training is pointless.
 
+  echo "$0: computing a PCA transform from the hires data."
   mkdir exp -p exp/nnet3
-
   steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
     --splice-opts "--left-context=3 --right-context=3" \
     --max-utts 30000 --subsample 2 \
@@ -84,12 +77,15 @@ fi
 
 if [ $stage -le 5 ]; then
   # To train a diagonal UBM we don't need very much data, so use a small subset
-  # (actually, it's not that small: still around 100 hours).
+  echo "$0: computing a PCA transform from the hires data."
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 700000 \
     data/train_30k_hires 512 exp/nnet3/pca_transform exp/nnet3/diag_ubm
 fi
 
 if [ $stage -le 6 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of 100
+  echo "$0: training the iVector extractor"
   steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
     data/${trainset}_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1;
 fi
@@ -104,6 +100,7 @@ if [ $stage -le 7 ]; then
 
   # having a larger number of speakers is helpful for generalization, and to
   # handle per-utterance decoding well (iVector starts at zero).
+  echo "$0: extracing iVector using trained iVector extractor"
   utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
     data/${trainset}_hires data/${trainset}_hires_max2
   

From 7b55b5f5a79ac2080f66727ecb2794f31d1ca4c9 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Tue, 28 Aug 2018 09:45:17 -0700
Subject: [PATCH 23/26] results updated

---
 egs/zeroth_korean/s5/RESULTS                        | 13 +++++++------
 .../s5/local/chain/tuning/run_tdnn_1a.sh            | 11 +++++------
 .../s5/local/chain/tuning/run_tdnn_opgru_1a.sh      | 12 ++++++------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/egs/zeroth_korean/s5/RESULTS b/egs/zeroth_korean/s5/RESULTS
index d8503cfcac4..976157fa584 100644
--- a/egs/zeroth_korean/s5/RESULTS
+++ b/egs/zeroth_korean/s5/RESULTS
@@ -52,12 +52,13 @@ done
 exit 0
 
 # tdnn_1a is a kind of factorized TDNN, with skip connections.
-exp/chain/tdnn_1a_sp: num-iters=72 nj=3..16 num-params=18.6M dim=40+100->3040 combine=-0.046->-0.045 (over 3) xent:train/valid[47,71,final]=(-0.898,-0.775,-0.766/-0.967,-0.855,-0.845) logprob:train/valid[47,71,final]=(-0.056,-0.043,-0.043/-0.069,-0.057,-0.057)
-%WER 11.42 [ 1057 / 9253, 128 ins, 193 del, 736 sub ] exp/chain/tdnn_1a_sp_online/decode_fglarge_test_clean/wer_16_1.0
-%WER 19.25 [ 1781 / 9253, 188 ins, 291 del, 1302 sub ] exp/chain/tdnn_1a_sp_online/decode_tgsmall_test_clean/wer_11_0.5
+exp/chain/tdnn1a_sp: num-iters=174 nj=2..8 num-params=8.4M dim=40+100->3040 combine=-0.049->-0.048 (over 3) xent:train/valid[115,173,final]=(-1.21,-0.841,-0.837/-1.20,-0.856,-0.853) logprob:train/valid[115,173,final]=(-0.091,-0.053,-0.053/-0.084,-0.055,-0.054)
+%WER 11.08 [ 1025 / 9253, 155 ins, 155 del, 715 sub ] exp/chain/tdnn1a_sp_online/decode_fglarge_test_clean/wer_11_0.0
+%WER 18.93 [ 1752 / 9253, 209 ins, 273 del, 1270 sub ] exp/chain/tdnn1a_sp_online/decode_tgsmall_test_clean/wer_11_0.0
+
 
 # This chain system has TDNN+Norm-OPGRU architecture. 
-exp/chain/tdnn_opgru_1a_sp: num-iters=130 nj=2..12 num-params=37.9M dim=40+100->3000 combine=-0.040->-0.038 (over 6) xent:train/valid[85,129,final]=(-1.12,-0.608,-0.616/-1.21,-0.697,-0.705) logprob:train/valid[85,129,final]=(-0.062,-0.027,-0.027/-0.067,-0.030,-0.030)
-%WER 9.33 [ 863 / 9253, 101 ins, 162 del, 600 sub ] exp/chain/tdnn_opgru_1a_sp_online/decode_fglarge_test_clean/wer_8_1.0
-%WER 15.13 [ 1400 / 9253, 154 ins, 217 del, 1029 sub ] exp/chain/tdnn_opgru_1a_sp_online/decode_tgsmall_test_clean/wer_9_0.0
+exp/chain/tdnn_opgru1a_sp: num-iters=99 nj=2..12 num-params=38.0M dim=40+100->3040 combine=-0.045->-0.045 (over 1) xent:train/valid[65,98,final]=(-1.18,-0.663,-0.651/-1.21,-0.698,-0.684) logprob:train/valid[65,98,final]=(-0.079,-0.038,-0.037/-0.076,-0.040,-0.039)
+%WER 9.45 [ 874 / 9253, 109 ins, 159 del, 606 sub ] exp/chain/tdnn_opgru1a_sp_online/decode_fglarge_test_clean/wer_10_1.0
+%WER 15.22 [ 1408 / 9253, 175 ins, 196 del, 1037 sub ] exp/chain/tdnn_opgru1a_sp_online/decode_tgsmall_test_clean/wer_8_0.0
 
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 3809c1cc31c..20ffd6630c4 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -10,15 +10,14 @@ set -e -o pipefail
 
 # ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
 # System                tdnn1a_sp
-#WER test_clean (tgsmall)               19.11
-#WER test_clean (fglarge)                 11.06
+#WER test_clean (tgsmall)               18.93
+#WER test_clean (fglarge)                 11.08
 # Final train prob        -0.0527
-# Final valid prob        -0.0545
-# Final train prob (xent)   -0.8395
-# Final valid prob (xent)   -0.8590
+# Final valid prob        -0.0541
+# Final train prob (xent)   -0.8366
+# Final valid prob (xent)   -0.8532
 # Num-params                 8426432
 
-
 # First the options that are passed through to run_ivector_common.sh
 # (some of which are also used in this script directly).
 stage=0
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 097d9f4f4e9..44110888519 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -10,12 +10,12 @@ set -e -o pipefail
 
 # ./local/chain/compare_wer.sh exp/chain/tdnn_opgru1a_sp
 # System                tdnn_opgru1a_sp
-#WER test_clean (tgsmall)               15.17
-#WER test_clean (fglarge)                  9.14
-# Final train prob        -0.0380
-# Final valid prob        -0.0378
-# Final train prob (xent)   -0.6470
-# Final valid prob (xent)   -0.6805
+#WER test_clean (tgsmall)               15.22
+#WER test_clean (fglarge)                  9.45
+# Final train prob        -0.0373
+# Final valid prob        -0.0386
+# Final train prob (xent)   -0.6506
+# Final valid prob (xent)   -0.6837
 # Num-params                37970368
 
 

From cb817af6d013a271f0300f4bfb75d1666963fe3b Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Thu, 30 Aug 2018 22:52:19 -0700
Subject: [PATCH 24/26] data prep interface change

---
 egs/zeroth_korean/s5/local/data_prep.sh | 11 +++++++----
 egs/zeroth_korean/s5/run.sh             |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/data_prep.sh b/egs/zeroth_korean/s5/local/data_prep.sh
index 5e6a7d02ce6..4fbb727f1cb 100755
--- a/egs/zeroth_korean/s5/local/data_prep.sh
+++ b/egs/zeroth_korean/s5/local/data_prep.sh
@@ -7,13 +7,16 @@
 # Modified by Lucas Jo 2017 (Altas Guide)
 
 if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <src-dir> <dst-dir>"
+  echo "Usage: $0 <db-dir> <part>"
   echo "e.g.: $0 ./db/train_data_01 data/train_data_01"
   exit 1
 fi
 
-src=$1
-dst=$2
+db_dir=$1
+data_part=$2
+
+src=${db_dir}/${data_part}
+dst=data/${data_part}
 
 # all utterances are FLAC compressed
 if ! which flac >&/dev/null; then
@@ -21,7 +24,7 @@ if ! which flac >&/dev/null; then
    exit 1
 fi
 
-spk_file=$src/../AUDIO_INFO
+spk_file=${db_dir}/AUDIO_INFO
 
 mkdir -p $dst || exit 1;
 
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index 58db7a93ad9..c5c7506980b 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -38,7 +38,7 @@ if [ $stage -le 1 ]; then
   # format the data as Kaldi data directories
   for part in train_data_01 test_data_01; do
   	# use underscore-separated names in data directories.
-  	local/data_prep.sh $db_dir/$part data/$part
+  	local/data_prep.sh $db_dir $part
   done
 fi
 

From 6259aed7afa33fde2fb1d820f411681b7aa32017 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Thu, 30 Aug 2018 23:01:44 -0700
Subject: [PATCH 25/26] cosmetic fix for ivector script

---
 .../s5/local/nnet3/run_ivector_common.sh           | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
index 7bde9a1ad9b..70be96310e1 100755
--- a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -55,29 +55,25 @@ if [ $stage -le 3 ]; then
     steps/compute_cmvn_stats.sh data/${datadir}_hires  || exit 1;
   done
 
-  # We need to build a small system just because we need the LDA+MLLT transform
-  # to train the diag-UBM on top of.  We align a subset of training data for
-  # this purpose.
+  # We need to build a small system just because we need PCA transform
+  # to train the diag-UBM on top of.  
   utils/subset_data_dir.sh data/${trainset}_hires 30000 data/train_30k_hires
 fi
 
 
 if [ $stage -le 4 ]; then
-  # Train a small system just for its LDA+MLLT transform.  We use --num-iters 13
-  # because after we get the transform (12th iter is the last), any further
-  # training is pointless.
-
+  # Train a small system just for its PCA transform.  
   echo "$0: computing a PCA transform from the hires data."
   mkdir exp -p exp/nnet3
   steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
     --splice-opts "--left-context=3 --right-context=3" \
     --max-utts 30000 --subsample 2 \
-    data/${trainset}_hires exp/nnet3/pca_transform
+    data/train_30k_hires exp/nnet3/pca_transform
 fi
 
 if [ $stage -le 5 ]; then
   # To train a diagonal UBM we don't need very much data, so use a small subset
-  echo "$0: computing a PCA transform from the hires data."
+  echo "$0: training the diagonal UBM."
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 700000 \
     data/train_30k_hires 512 exp/nnet3/pca_transform exp/nnet3/diag_ubm
 fi

From 7e14701aeb3ca9814d3e72124766468ed2ddfd89 Mon Sep 17 00:00:00 2001
From: Wonkyum Lee <wonkyum@gridspace.com>
Date: Fri, 31 Aug 2018 09:10:25 -0700
Subject: [PATCH 26/26] increase parameter for TDNN-F

---
 egs/zeroth_korean/s5/RESULTS                  |  7 ++-
 .../s5/local/chain/tuning/run_tdnn_1a.sh      | 54 +++++++++----------
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/egs/zeroth_korean/s5/RESULTS b/egs/zeroth_korean/s5/RESULTS
index 976157fa584..9255ec17673 100644
--- a/egs/zeroth_korean/s5/RESULTS
+++ b/egs/zeroth_korean/s5/RESULTS
@@ -52,10 +52,9 @@ done
 exit 0
 
 # tdnn_1a is a kind of factorized TDNN, with skip connections.
-exp/chain/tdnn1a_sp: num-iters=174 nj=2..8 num-params=8.4M dim=40+100->3040 combine=-0.049->-0.048 (over 3) xent:train/valid[115,173,final]=(-1.21,-0.841,-0.837/-1.20,-0.856,-0.853) logprob:train/valid[115,173,final]=(-0.091,-0.053,-0.053/-0.084,-0.055,-0.054)
-%WER 11.08 [ 1025 / 9253, 155 ins, 155 del, 715 sub ] exp/chain/tdnn1a_sp_online/decode_fglarge_test_clean/wer_11_0.0
-%WER 18.93 [ 1752 / 9253, 209 ins, 273 del, 1270 sub ] exp/chain/tdnn1a_sp_online/decode_tgsmall_test_clean/wer_11_0.0
-
+exp/chain/tdnn1b_sp: num-iters=174 nj=2..8 num-params=12.9M dim=40+100->3040 combine=-0.041->-0.041 (over 2) xent:train/valid[115,173,final]=(-1.14,-0.759,-0.751/-1.14,-0.788,-0.777) logprob:train/valid[115,173,final]=(-0.084,-0.047,-0.046/-0.080,-0.050,-0.048)
+%WER 10.55 [ 976 / 9253, 122 ins, 166 del, 688 sub ] exp/chain/tdnn1b_sp_online/decode_fglarge_test_clean/wer_13_1.0
+%WER 17.65 [ 1633 / 9253, 208 ins, 233 del, 1192 sub ] exp/chain/tdnn1b_sp_online/decode_tgsmall_test_clean/wer_10_0.0
 
 # This chain system has TDNN+Norm-OPGRU architecture. 
 exp/chain/tdnn_opgru1a_sp: num-iters=99 nj=2..12 num-params=38.0M dim=40+100->3040 combine=-0.045->-0.045 (over 1) xent:train/valid[65,98,final]=(-1.18,-0.663,-0.651/-1.21,-0.698,-0.684) logprob:train/valid[65,98,final]=(-0.079,-0.038,-0.037/-0.076,-0.040,-0.039)
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 20ffd6630c4..55e046dd55a 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -6,17 +6,17 @@ set -e -o pipefail
 # The training recipe is from WSJ example(egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh)
 
 # steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp
-# exp/chain/tdnn1a_sp: num-iters=174 nj=2..8 num-params=8.4M dim=40+100->3040 combine=-0.049->-0.048 (over 3) xent:train/valid[115,173,final]=(-1.23,-0.838,-0.839/-1.22,-0.863,-0.859) logprob:train/valid[115,173,final]=(-0.091,-0.053,-0.053/-0.087,-0.056,-0.055)
+# exp/chain/tdnn1b_sp: num-iters=174 nj=2..8 num-params=12.9M dim=40+100->3040 combine=-0.041->-0.041 (over 2) xent:train/valid[115,173,final]=(-1.14,-0.759,-0.751/-1.14,-0.788,-0.777) logprob:train/valid[115,173,final]=(-0.084,-0.047,-0.046/-0.080,-0.050,-0.048)
 
 # ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
-# System                tdnn1a_sp
-#WER test_clean (tgsmall)               18.93
-#WER test_clean (fglarge)                 11.08
-# Final train prob        -0.0527
-# Final valid prob        -0.0541
-# Final train prob (xent)   -0.8366
-# Final valid prob (xent)   -0.8532
-# Num-params                 8426432
+# System                tdnn1b_sp
+#WER test_clean (tgsmall)               17.65
+#WER test_clean (fglarge)                 10.55
+# Final train prob        -0.0460
+# Final valid prob        -0.0480
+# Final train prob (xent)   -0.7512
+# Final valid prob (xent)   -0.7769
+# Num-params                12922560
 
 # First the options that are passed through to run_ivector_common.sh
 # (some of which are also used in this script directly).
@@ -174,26 +174,26 @@ if [ $stage -le 11 ]; then
   fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 
   # the first splicing is moved before the lda layer, so no splicing here
-  relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1024
-  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
-  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
-  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
-  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0
-  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
-  linear-component name=prefinal-l dim=192 $linear_opts
-
-
-  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
+  relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1280
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1280 bottleneck-dim=160 time-stride=3
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1280 small-dim=256
   output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
 
-  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1280 small-dim=256
   output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
 
 EOF