From df0913327878801b1b8e1c3301ec8becc5d82a9d Mon Sep 17 00:00:00 2001
From: Nagendra Kumar Goel <kumar@a18.clsp.jhu.edu>
Date: Tue, 9 Jan 2018 17:05:01 -0500
Subject: [PATCH 01/11] SWBD stats pooling VAD recipe

---
 .../s5c/local/run_cleanup_segmentation.sh     | 56 ++++++++++++++
 .../local/segmentation/copy_targets_dir.sh    | 76 +++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100755 egs/swbd/s5c/local/run_cleanup_segmentation.sh
 create mode 100755 egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
diff --git a/egs/swbd/s5c/local/run_cleanup_segmentation.sh b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
new file mode 100755
index 00000000000..d08d3f0e0b4
--- /dev/null
+++ b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Copyright 2016  Vimal Manohar
+#           2016  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+# This script demonstrates how to re-segment training data selecting only the
+# "good" audio that matches the transcripts.
+# The basic idea is to decode with an existing in-domain acoustic model, and a
+# biased language model built from the reference, and then work out the
+# segmentation from a ctm like file.
+
+# For nnet3 and chain results after cleanup, see the scripts in
+# local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh
+
+# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets
+# [will add these later].
+
+set -e
+set -o pipefail
+set -u
+
+stage=0
+cleanup_stage=0
+data=data/train
+cleanup_affix=cleaned
+srcdir=exp/tri4_mmi_b0.1
+langdir=data/lang_sw1_tg
+nj=100
+decode_nj=16
+decode_num_threads=4
+
+. ./path.sh
+. ./cmd.sh
+. utils/parse_options.sh
+
+cleaned_data=${data}_${cleanup_affix}
+
+dir=${srcdir}_${cleanup_affix}_work
+cleaned_dir=${srcdir}_${cleanup_affix}
+
+if [ $stage -le 1 ]; then
+  # This does the actual data cleanup.
+  steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \
+    $data $langdir $srcdir $dir $cleaned_data
+fi
+
+if [ $stage -le 2 ]; then
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    $cleaned_data $langdir $srcdir ${srcdir}_ali_${cleanup_affix}
+fi
+
+if [ $stage -le 3 ]; then
+  steps/train_sat.sh --cmd "$train_cmd" \
+    5000 100000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir}
+fi
diff --git a/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh b/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
new file mode 100755
index 00000000000..8be70b4715a
--- /dev/null
+++ b/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Copyright 2014  Johns Hopkins University (author: Nagendra K Goel)
+# Apache 2.0
+
+# This script operates on a directory, such as in exp/segmentation_1a/train_whole_combined_targets_rev1,
+# that contains some subset of the following files:
+# targets.X.ark
+# frame_subsampling_factor
+# It copies to another directory, possibly adding a specified prefix or a suffix
+# to the utterance names.
+
+
+# begin configuration section
+utt_prefix=
+utt_suffix=
+cmd=run.pl
+# end configuration section
+
+. utils/parse_options.sh
+
+if [ $# != 2 ]; then
+  echo "Usage: "
+  echo "  $0 [options] <src_dir> <dest_dir>"
+  echo "e.g.:"
+  echo " $0  --utt-prefix=1- exp/segmentation_1a/train_whole_combined_targets_sub3 exp/segmentation_1a/train_whole_combined_targets_sub3_rev1"
+  echo "Options"
+  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
+  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
+  exit 1;
+fi
+
+
+export LC_ALL=C
+
+src_dir=$1
+dest_dir=$2
+
+mkdir -p $dest_dir
+
+if [ ! -f $src_dir/targets.1.ark ]; then
+  echo "copy_targets_dir.sh: no such files $src_dir/targets.1.ark"
+  exit 1;
+fi
+
+for f in frame_subsampling_factor; do
+  if [ ! -f $src_dir/$f ]; then
+    echo "$0: no such file $src_dir/$f this might be serious error."
+    continue
+  fi
+  cp $src_dir/$f $dest_dir/
+done
+
+nj=$(ls $src_dir/targets.*.ark | wc -l)
+mkdir -p $dest_dir/temp
+cat << EOF > $dest_dir/temp/copy_targets.sh
+set -e;
+id=\$1
+echo "$src_dir/targets.\$id.ark"
+copy-matrix ark:$src_dir/targets.\$id.ark ark,t:- | \
+python -c "
+import sys
+for line in sys.stdin:
+      parts = line.split()
+      if \"[\" not in line:
+            print line.rstrip()
+      else:
+            print '$utt_prefix{0}$utt_suffix {1}'.format(parts[0], ' '.join(parts[1:]))
+" | \
+  copy-matrix ark,t:- ark:$dest_dir/targets.\$id.ark || exit 1;
+set +o pipefail; # unset the pipefail option.
+EOF
+chmod +x $dest_dir/temp/copy_targets.sh
+$cmd -v PATH JOB=1:$nj $dest_dir/temp/copy_targets.JOB.log $dest_dir/temp/copy_targets.sh JOB || exit 1;
+
+echo "$0: copied targets from $src_dir to $dest_dir"

From b9c7161fbaf6b37907a47e3dfa510c28b5c4abdd Mon Sep 17 00:00:00 2001
From: Nagendra Kumar Goel <nagendra.goel@govivace.com>
Date: Tue, 9 Jan 2018 17:14:38 -0500
Subject: [PATCH 02/11] Add SWBD VAD recipe

---
 egs/swbd/s5c/local/run_asr_segmentation.sh    | 83 ++++++++++++++-----
 .../s5c/local/run_cleanup_segmentation.sh     |  3 +-
 .../local/segmentation/copy_targets_dir.sh    |  3 +-
 .../tuning/train_lstm_asr_sad_1a.sh           |  7 +-
 .../tuning/train_stats_asr_sad_1a.sh          |  9 +-
 5 files changed, 77 insertions(+), 28 deletions(-)

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index 32b2e3a8411..d87703d1e90 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -1,15 +1,17 @@
-#! /bin/bash
+#!/bin/bash
 
-# Copyright 2017  Vimal Manohar
+# Copyright  2017  Nagendra Kumar Goel
+#            2017  Vimal Manohar
 # Apache 2.0
 
-# Features configs (Must match the features used to train the models
-# $sat_model_dir and $model_dir)
+# We assume the run-1-main.sh (because we are using model directories like
+# exp/tri4) and later we assumme run-4-anydecode.sh was run to prepare
+# data/dev10h.pem
 
-lang=data/lang_nosp   # Must match the one used to train the models
+lang=data/lang   # Must match the one used to train the models
 lang_test=data/lang_nosp_sw1_tg  # Lang directory for decoding.
 
-data_dir=data/train_100k_nodup
+data_dir=data/train 
 # Model directory used to align the $data_dir to get target labels for training
 # SAD. This should typically be a speaker-adapted system.
 sat_model_dir=exp/tri4
@@ -40,8 +42,8 @@ affix=_1a
 stage=-1
 nj=80
 
-. ./path.sh
-. ./cmd.sh
+. path.sh
+. cmd.sh 
 
 set -e -u -o pipefail
 . utils/parse_options.sh 
@@ -55,7 +57,7 @@ mkdir -p $dir
 
 # See $lang/phones.txt and decide which should be garbage
 garbage_phones="lau spn"
-silence_phones="nsn SIL"
+silence_phones="sil"
 
 for p in $garbage_phones; do 
   for affix in "" "_B" "_E" "_I" "_S"; do
@@ -85,8 +87,10 @@ fi
 # Extract features for the whole data directory
 ###############################################################################
 if [ $stage -le 1 ]; then
-  steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj --write-utt2num-frames true \
-    ${whole_data_dir} || exit 1
+  steps/make_mfcc.sh --nj 50 --cmd "$train_cmd"  --write-utt2num-frames true \
+    $whole_data_dir exp/make_mfcc/train_whole
+  steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/train_whole
+  utils/fix_data_dir.sh $whole_data_dir
 fi
 
 ###############################################################################
@@ -112,18 +116,27 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ]; then
-  utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}_hires_bp
-  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj 40 \
-    ${whole_data_dir}_hires_bp
-  steps/compute_cmvn_stats.sh ${whole_data_dir}_hires_bp
+  utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}_hires
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 \
+    ${whole_data_dir}_hires
+  steps/compute_cmvn_stats.sh ${whole_data_dir}_hires
 fi
 
+# if [ $stage -le 4.5 ]; then
+#   # Train a TDNN-LSTM network for SAD
+#   local/segmentation/tuning/train_lstm_asr_sad_1a.sh \
+#     --stage $nstage --train-stage $train_stage \
+#     --targets-dir $dir \
+#     --data-dir ${whole_data_dir}_hires
+# fi
+
 if [ $stage -le 5 ]; then
   # Train a TDNN-LSTM network for SAD
-  local/segmentation/tuning/train_lstm_asr_sad_1a.sh \
+
+    local/segmentation/tuning/train_stats_asr_sad_1a.sh \
     --stage $nstage --train-stage $train_stage \
     --targets-dir $dir \
-    --data-dir ${whole_data_dir}_hires_bp
+    --data-dir ${whole_data_dir}_hires
 fi
 
 if [ $stage -le 6 ]; then
@@ -137,9 +150,37 @@ if [ $stage -le 6 ]; then
   steps/segmentation/detect_speech_activity.sh \
     --extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 \
     --extra-left-context-initial 0 --extra-right-context-final 0 \
-    --nj 32 --acwt 0.3 --stage $test_stage \
+    --nj 32 --acwt 0.3 --mfcc-config "conf/mfcc_hires.conf" --stage $test_stage \
     data/eval2000 \
-    exp/segmentation_1a/tdnn_lstm_asr_sad_1a \
-    mfcc_hires_bp \
-    exp/segmentation_1a/tdnn_lstm_asr_sad_1a/{,eval2000}
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2 \
+    mfcc_hires \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/{,eval2000}
+fi
+
+if [ $stage -le 7 ]; then
+  # Do some diagnostics
+  steps/segmentation/evaluate_segmentation.pl data/eval2000/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments &> \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/evaluate_segmentation.log
+  
+  steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/utt2spk \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm
+
+  steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
+    data/eval2000/utt2spk \
+    data/eval2000/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/ref.rttm
+  
+  export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin
+  md-eval.pl -c 0.25 -r exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/ref.rttm \
+    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm > \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/md_eval.log
+fi
+
+if [ $stage -le 8 ]; then
+  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg \
+    data/eval2000.seg_asr_sad_1a
 fi
+  
diff --git a/egs/swbd/s5c/local/run_cleanup_segmentation.sh b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
index d08d3f0e0b4..b286f10e0d3 100755
--- a/egs/swbd/s5c/local/run_cleanup_segmentation.sh
+++ b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-# Copyright 2016  Vimal Manohar
+#           2017  Nagendra Kumar Goel
+#           2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh b/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
index 8be70b4715a..81c9193d22e 100755
--- a/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
+++ b/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-# Copyright 2014  Johns Hopkins University (author: Nagendra K Goel)
+# Copyright    2017  Nagendra Kumar Goel
+#              2014  Johns Hopkins University (author: Nagendra K Goel)
 # Apache 2.0
 
 # This script operates on a directory, such as in exp/segmentation_1a/train_whole_combined_targets_rev1,
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 63f78aa8092..9ea3e895f95 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -1,12 +1,15 @@
 #!/bin/bash
 
+# Copyright 2017 Nagendra Kumar Goel
+# Apache 2.0
+
 # This is a script to train a TDNN-LSTM for speech activity detection (SAD) 
 # using LSTM for long-context information.
 
 set -o pipefail
 set -u
 
-. ./cmd.sh
+. cmd.sh
 
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
@@ -47,7 +50,7 @@ affix=1a
 data_dir=exp/segmentation_1a/train_whole_hires_bp
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
-. ./cmd.sh
+. cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 2dfe9a0bb96..b3a6b6948a3 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,12 +1,15 @@
 #!/bin/bash
 
+# Copyright 2017   Nagendra Kumar Goel
+# Apache 2.0
+
 # This is a script to train a TDNN for speech activity detection (SAD) 
 # using statistics pooling for long-context information.
 
 set -o pipefail
 set -u
 
-. ./cmd.sh
+. cmd.sh
 
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
@@ -46,7 +49,7 @@ affix=1a2
 data_dir=exp/segmentation_1a/train_whole_hires_bp
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
-. ./cmd.sh
+. cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
@@ -132,7 +135,7 @@ if [ $stage -le 6 ]; then
 
   copy-feats scp:$targets_dir/targets.scp ark:- | \
     matrix-sum-rows ark:- ark:- | vector-sum --binary=false ark:- - | \
-    awk '{print " [ "$2" "$3" ]"}' > $dir/post_output.vec
+    awk '{print " [ "$2" "$3" "$4" ]"}' > $dir/post_output.vec
 
   echo 3 > $dir/frame_subsampling_factor
 fi

From 36747c4273685b3e88a25b1bacd4e8d3fa2a079e Mon Sep 17 00:00:00 2001
From: Nagendra Kumar Goel <nagendra.goel@govivace.com>
Date: Thu, 11 Jan 2018 12:13:10 -0500
Subject: [PATCH 03/11] path.sh convention and comments update

---
 egs/swbd/s5c/local/run_asr_segmentation.sh               | 9 ++++-----
 egs/swbd/s5c/local/run_cleanup_segmentation.sh           | 2 +-
 .../local/segmentation/tuning/train_lstm_asr_sad_1a.sh   | 3 +--
 .../local/segmentation/tuning/train_stats_asr_sad_1a.sh  | 5 ++---
 egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh     | 1 +
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index d87703d1e90..d986a481f8c 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -4,9 +4,8 @@
 #            2017  Vimal Manohar
 # Apache 2.0
 
-# We assume the run-1-main.sh (because we are using model directories like
-# exp/tri4) and later we assumme run-4-anydecode.sh was run to prepare
-# data/dev10h.pem
+# We assume the run.sh has been executed (because we are using model
+# directories like exp/tri4)
 
 lang=data/lang   # Must match the one used to train the models
 lang_test=data/lang_nosp_sw1_tg  # Lang directory for decoding.
@@ -42,8 +41,8 @@ affix=_1a
 stage=-1
 nj=80
 
-. path.sh
-. cmd.sh 
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
 
 set -e -u -o pipefail
 . utils/parse_options.sh 
diff --git a/egs/swbd/s5c/local/run_cleanup_segmentation.sh b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
index b286f10e0d3..8b08422d277 100755
--- a/egs/swbd/s5c/local/run_cleanup_segmentation.sh
+++ b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
@@ -31,8 +31,8 @@ nj=100
 decode_nj=16
 decode_num_threads=4
 
-. ./path.sh
 . ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
 . utils/parse_options.sh
 
 cleaned_data=${data}_${cleanup_affix}
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 9ea3e895f95..e3baa67b606 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -50,8 +50,7 @@ affix=1a
 data_dir=exp/segmentation_1a/train_whole_hires_bp
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
-. cmd.sh
-. ./path.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
 . ./utils/parse_options.sh
 
 if [ -z "$dir" ]; then
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index b3a6b6948a3..842f96ce1b9 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -9,7 +9,7 @@
 set -o pipefail
 set -u
 
-. cmd.sh
+. ./cmd.sh
 
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
@@ -49,8 +49,7 @@ affix=1a2
 data_dir=exp/segmentation_1a/train_whole_hires_bp
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
-. cmd.sh
-. ./path.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
 . ./utils/parse_options.sh
 
 if [ -z "$dir" ]; then
diff --git a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
index f8557a70177..bc646986eea 100755
--- a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
+++ b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
@@ -1,6 +1,7 @@
 #! /bin/bash
 
 # Copyright 2017  Vimal Manohar
+#           2017  Nagendra Kumar Goel
 # Apache 2.0
   
 # This script prepares targets for training neural network for 

From 6390477ce0ce8e21806886f3362e5625d7c37c8e Mon Sep 17 00:00:00 2001
From: Nagendra Kumar Goel <nagendra.goel@govivace.com>
Date: Fri, 12 Jan 2018 17:25:17 -0500
Subject: [PATCH 04/11] add options for noise and reverberations

---
 egs/swbd/s5c/local/run_asr_segmentation.sh    | 101 +++++++++++++-----
 .../segmentation/combine_targets_dirs.sh      |  83 ++++++++++++++
 .../tuning/train_stats_asr_sad_1a.sh          |  13 +--
 .../steps/segmentation/prepare_targets_gmm.sh |   2 +-
 4 files changed, 163 insertions(+), 36 deletions(-)
 create mode 100755 egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index d986a481f8c..4d3356dc7b0 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -36,7 +36,8 @@ prepare_targets_stage=-10
 nstage=-10
 train_stage=-10
 test_stage=-10
-
+num_data_reps=1
+base_rirs=simulated
 affix=_1a
 stage=-1
 nj=80
@@ -77,6 +78,7 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
 fi
 
 whole_data_dir=${data_dir}_whole
+rvb_data_dir=${whole_data_dir}_rvb
 
 if [ $stage -le 0 ]; then
   utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
@@ -115,30 +117,76 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ]; then
-  utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}_hires
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    if [ ! -f rirs_noises.zip ]; then
+	wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	unzip rirs_noises.zip
+    fi
+
+    rvb_opts=()
+    if [ "$base_rirs" == "simulated" ]; then
+	# This is the config for the system using simulated RIRs and point-source noises
+	rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+	rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+	rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
+    else
+	# This is the config for the JHU ASpIRE submission system
+	rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list")
+	rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list)
+    fi
+
+    foreground_snrs="20:10:15:5:0"
+    background_snrs="20:10:15:5:0"
+    num_reps=1
+    # corrupt the data to generate multi-condition data
+    # for data_dir in train dev test; do
+    python steps/data/reverberate_data_dir.py \
+	   "${rvb_opts[@]}" \
+	   --prefix "rev" \
+	   --foreground-snrs $foreground_snrs \
+	   --background-snrs $background_snrs \
+	   --speech-rvb-probability 0.5 \
+	   --pointsource-noise-addition-probability 0.5 \
+	   --isotropic-noise-addition-probability 0.7 \
+	   --num-replications $num_reps \
+	   --max-noises-per-minute 4 \
+	   --source-sampling-rate 8000 \
+	   $whole_data_dir $rvb_data_dir
+
+    for i in `seq 1 $num_data_reps`; do
+	local/segmentation/copy_targets_dir.sh --cmd "$decode_cmd" --utt-prefix "rev${i}_" exp/segmentation_1a/train_whole_combined_targets_sub3 exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i || exit 1;
+	rvb_dirs+=" exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i"
+    done
+
+    local/segmentation/combine_targets_dirs.sh $rvb_data_dir exp/segmentation_1a/train_whole_combined_targets_sub3_rvb $rvb_dirs || exit 1;
+    cp exp/segmentation_1a/train_whole_combined_targets_sub3_rvb/targets.scp  exp/segmentation_1a/
+fi
+
+if [ $stage -le 5 ]; then
+  utils/copy_data_dir.sh ${rvb_data_dir} ${rvb_data_dir}_hires
   steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 \
-    ${whole_data_dir}_hires
-  steps/compute_cmvn_stats.sh ${whole_data_dir}_hires
+    ${rvb_data_dir}_hires
+  steps/compute_cmvn_stats.sh ${rvb_data_dir}_hires
 fi
 
-# if [ $stage -le 4.5 ]; then
+# if [ $stage -le 6 ]; then
 #   # Train a TDNN-LSTM network for SAD
 #   local/segmentation/tuning/train_lstm_asr_sad_1a.sh \
 #     --stage $nstage --train-stage $train_stage \
 #     --targets-dir $dir \
-#     --data-dir ${whole_data_dir}_hires
+#     --data-dir ${rvb_data_dir}_hires
 # fi
 
-if [ $stage -le 5 ]; then
-  # Train a TDNN-LSTM network for SAD
+if [ $stage -le 6 ]; then
+  # Train a STATS-pooling network for SAD
 
     local/segmentation/tuning/train_stats_asr_sad_1a.sh \
     --stage $nstage --train-stage $train_stage \
     --targets-dir $dir \
-    --data-dir ${whole_data_dir}_hires
+    --data-dir ${rvb_data_dir}_hires
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   # The options to this script must match the options used in the 
   # nnet training script. 
   # e.g. extra-left-context is 70, because the model is an LSTM trained with a 
@@ -149,37 +197,32 @@ if [ $stage -le 6 ]; then
   steps/segmentation/detect_speech_activity.sh \
     --extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 \
     --extra-left-context-initial 0 --extra-right-context-final 0 \
-    --nj 32 --acwt 0.3 --mfcc-config "conf/mfcc_hires.conf" --stage $test_stage \
+    --nj 32 --acwt 0.3 --stage $test_stage \
     data/eval2000 \
     exp/segmentation_1a/tdnn_stats_asr_sad_1a2 \
     mfcc_hires \
     exp/segmentation_1a/tdnn_stats_asr_sad_1a2/{,eval2000}
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 8 ]; then
   # Do some diagnostics
-  steps/segmentation/evaluate_segmentation.pl data/eval2000/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments &> \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/evaluate_segmentation.log
+  steps/segmentation/evalute_segmentation.pl data/dev10h.pem/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/segments &> \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/evalutate_segmentation.log
   
   steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/utt2spk \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/utt2spk \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/sys.rttm
 
-  steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
-    data/eval2000/utt2spk \
-    data/eval2000/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/ref.rttm
-  
   export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin
-  md-eval.pl -c 0.25 -r exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/ref.rttm \
-    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm > \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/md_eval.log
+  md-eval.pl -c 0.25 -r $dev10h_rttm_file \
+    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/sys.rttm > \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/md_eval.log
 fi
 
-if [ $stage -le 8 ]; then
-  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg \
-    data/eval2000.seg_asr_sad_1a
+if [ $stage -le 9 ]; then
+  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg \
+    data/dev10h.seg_asr_sad_1a
 fi
   
diff --git a/egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh b/egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh
new file mode 100755
index 00000000000..48c4ce93db0
--- /dev/null
+++ b/egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright 2017 Nagendra Kumar Goel
+# Apache 2.0.
+
+# This srcipt operates on targets directories, such as exp/segmentation_1a/train_whole_combined_targets_sub3
+# the output is a new targets dir which has targets from all the input targets dirs
+
+# Begin configuration section.
+cmd=run.pl
+extra_files=
+num_jobs=4
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [[ $# -lt 3 ]]; then
+  echo "Usage: $0 [options] <data> <dest-targets-dir> <src-targets-dir1> <src-targets-dir2> ..."
+  echo "e.g.: $0 --num-jobs 32 data/train exp/targets_combined exp/targets_1 exp/targets_2"
+  echo "Options:"
+  echo " --extra-files <file1 file2...>   # specify addtional files in 'src-targets-dir1' to copy"
+  echo " --num-jobs <nj>                  # number of jobs used to split the data directory."
+  echo " Note, files that don't appear in the first source dir will not be added even if they appear in later ones."
+  echo " Other than alignments, only files from the first src ali dir are copied."
+  exit 1;
+fi
+
+data=$1;
+shift;
+dest=$1;
+shift;
+first_src=$1;
+
+mkdir -p $dest;
+rm $dest/{targets.*.ark,frame_subsampling_factor} 2>/dev/null
+
+cp $first_src/frame_subsampling_factor $dest 2>/dev/null
+
+export LC_ALL=C
+
+for dir in $*; do
+  if [ ! -f $dir/targets.1.ark ]; then
+    echo "$0: check if targets (targets.*.ark) are present in $dir."
+    exit 1;
+  fi
+done
+
+for dir in $*; do
+  for f in frame_subsampling_factor; do
+    diff $first_src/$f $dir/$f 1>/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+      echo "$0: Cannot combine alignment directories with different $f files."
+    fi
+  done
+done
+
+for f in frame_subsampling_factor $extra_files; do
+  if [ ! -f $first_src/$f ]; then
+    echo "combine_targets_dir.sh: no such file $first_src/$f"
+    exit 1;
+  fi
+  cp $first_src/$f $dest/
+done
+
+src_id=0
+temp_dir=$dest/temp
+[ -d $temp_dir ] && rm -r $temp_dir;
+mkdir -p $temp_dir
+echo "$0: dumping targets in each source directory as single archive and index."
+for dir in $*; do
+  src_id=$((src_id + 1))
+  cur_num_jobs=$(ls $dir/targets.*.ark | wc -l) || exit 1;
+  tgts=$(for n in $(seq $cur_num_jobs); do echo -n "$dir/targets.$n.ark "; done)
+  $cmd $dir/log/copy_targets.log \
+    copy-matrix "ark:cat $tgts|" \
+    ark,scp:$temp_dir/targets.$src_id.ark,$temp_dir/targets.$src_id.scp || exit 1;
+done
+sort -m $temp_dir/targets.*.scp > $dest/targets.scp || exit 1;
+
+
+echo "Combined targets and stored in $dest"
+exit 0
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 842f96ce1b9..feb88a53454 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 
 # Copyright 2017   Nagendra Kumar Goel
+#           2016   Vimal Manohar
 # Apache 2.0
-
 # This is a script to train a TDNN for speech activity detection (SAD) 
 # using statistics pooling for long-context information.
 
 set -o pipefail
 set -u
 
-. ./cmd.sh
+. cmd.sh
 
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
@@ -30,11 +30,11 @@ extra_right_context=21
 relu_dim=256
 
 # training options
-num_epochs=4
+num_epochs=2
 initial_effective_lrate=0.0003
 final_effective_lrate=0.00003
-num_jobs_initial=3
-num_jobs_final=8
+num_jobs_initial=1
+num_jobs_final=1
 remove_egs=true
 max_param_change=0.2  # Small max-param change for small network
 
@@ -49,6 +49,7 @@ affix=1a2
 data_dir=exp/segmentation_1a/train_whole_hires_bp
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
+. cmd.sh
 if [ -f ./path.sh ]; then . ./path.sh; fi
 . ./utils/parse_options.sh
 
@@ -134,7 +135,7 @@ if [ $stage -le 6 ]; then
 
   copy-feats scp:$targets_dir/targets.scp ark:- | \
     matrix-sum-rows ark:- ark:- | vector-sum --binary=false ark:- - | \
-    awk '{print " [ "$2" "$3" "$4" ]"}' > $dir/post_output.vec
+    awk '{print " [ "$2" "$3" ]"}' > $dir/post_output.vec
 
   echo 3 > $dir/frame_subsampling_factor
 fi
diff --git a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
index bc646986eea..de19cfc6772 100755
--- a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
+++ b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
@@ -211,7 +211,7 @@ if [ $stage -le 5 ]; then
   # the speech / silence decisions, not the exact word sequences.
   steps/decode.sh --cmd "$decode_cmd --mem 2G" --nj $nj \
     --max-active 1000 --beam 10.0 \
-    --decode-extra-opts "--word-determinize=false" --skip-scoring true \
+    --skip-scoring true \
     $graph_dir $uniform_seg_data_dir $decode_dir
 fi
 

From b62c2a87ce2bbdbc65f48afd6f6675b97c68c7f7 Mon Sep 17 00:00:00 2001
From: Nagendra Kumar Goel <nagendra.goel@govivace.com>
Date: Tue, 16 Jan 2018 08:46:49 -0500
Subject: [PATCH 05/11] Fix bugs in evaluations part

---
 egs/swbd/s5c/local/run_asr_segmentation.sh    | 30 +++++++++----------
 .../tuning/train_lstm_asr_sad_1a.sh           |  2 +-
 .../tuning/train_stats_asr_sad_1a.sh          | 12 ++++----
 .../segmentation/detect_speech_activity.sh    | 12 ++++----
 4 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index 4d3356dc7b0..7129e905480 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -36,7 +36,7 @@ prepare_targets_stage=-10
 nstage=-10
 train_stage=-10
 test_stage=-10
-num_data_reps=1
+num_data_reps=2
 base_rirs=simulated
 affix=_1a
 stage=-1
@@ -164,7 +164,7 @@ fi
 
 if [ $stage -le 5 ]; then
   utils/copy_data_dir.sh ${rvb_data_dir} ${rvb_data_dir}_hires
-  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 \
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 10 \
     ${rvb_data_dir}_hires
   steps/compute_cmvn_stats.sh ${rvb_data_dir}_hires
 fi
@@ -206,23 +206,23 @@ fi
 
 if [ $stage -le 8 ]; then
   # Do some diagnostics
-  steps/segmentation/evalute_segmentation.pl data/dev10h.pem/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/segments &> \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/evalutate_segmentation.log
+  steps/segmentation/evaluate_segmentation.pl data/eval2000/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments &> \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/evalutate_segmentation.log
   
   steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/utt2spk \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/sys.rttm
-
-  export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin
-  md-eval.pl -c 0.25 -r $dev10h_rttm_file \
-    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/sys.rttm > \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg/md_eval.log
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/utt2spk \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm
+
+#  export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin
+#  md-eval.pl -c 0.25 -r $eval2000_rttm_file \
+#    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm > \
+#    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/md_eval.log
 fi
 
 if [ $stage -le 9 ]; then
-  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a2/dev10h_seg \
-    data/dev10h.seg_asr_sad_1a
+  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg \
+    data/eval2000.seg_asr_sad_1a
 fi
   
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index e3baa67b606..74697df099f 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -9,7 +9,7 @@
 set -o pipefail
 set -u
 
-. cmd.sh
+. ./cmd.sh
 
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index feb88a53454..3254929306f 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -9,7 +9,7 @@
 set -o pipefail
 set -u
 
-. cmd.sh
+. ./cmd.sh
 
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
@@ -30,7 +30,7 @@ extra_right_context=21
 relu_dim=256
 
 # training options
-num_epochs=2
+num_epochs=1
 initial_effective_lrate=0.0003
 final_effective_lrate=0.00003
 num_jobs_initial=1
@@ -46,7 +46,7 @@ config_dir=
 dir=
 affix=1a2
 
-data_dir=exp/segmentation_1a/train_whole_hires_bp
+data_dir=exp/segmentation_1a/train_whole_rvb_hires
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
 . cmd.sh
@@ -132,10 +132,12 @@ if [ $stage -le 6 ]; then
     --targets-scp="$targets_dir/targets.scp" \
     --egs.opts="--frame-subsampling-factor 3 --num-utts-subset $num_utts_subset" \
     --dir=$dir || exit 1
+fi
 
-  copy-feats scp:$targets_dir/targets.scp ark:- | \
+if [ $stage -le 7 ]; then
+    copy-feats scp:$targets_dir/targets.scp ark:- | \
     matrix-sum-rows ark:- ark:- | vector-sum --binary=false ark:- - | \
-    awk '{print " [ "$2" "$3" ]"}' > $dir/post_output.vec
+    awk '{print " [ "$2" "$3" "$4" ]"}' > $dir/post_output.vec
 
   echo 3 > $dir/frame_subsampling_factor
 fi
diff --git a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
index 69f47c28d60..9bc8eea675c 100755
--- a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
+++ b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 # Copyright 2016-17  Vimal Manohar
+#              2017  Nagendra Kumar Goel
 # Apache 2.0.
 
 # This script does nnet3-based speech activity detection given an input 
@@ -12,16 +13,17 @@ set -e
 set -o pipefail
 set -u
 
-. ./path.sh
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
 
 affix=  # Affix for the segmentation
 nj=32
-cmd=queue.pl
+cmd=$decode_cmd
 stage=-1
 
 # Feature options (Must match training)
-mfcc_config=conf/mfcc_hires_bp.conf
-feat_affix=bp   # Affix for the type of feature used
+mfcc_config=conf/mfcc_hires.conf
+feat_affix=hires   # Affix for the type of feature used
 
 convert_data_dir_to_whole=true    # If true, the input data directory is 
                                   # first converted to whole data directory (i.e. whole recordings)
@@ -67,7 +69,7 @@ if [ $# -ne 5 ]; then
   echo "See script for details of the options to be supplied."
   echo "Usage: $0 <src-data-dir> <sad-nnet-dir> <mfcc-dir> <work-dir> <out-data-dir>"
   echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev exp/nnet3_sad_snr/nnet_tdnn_j_n4 \\"
-  echo "    mfcc_hires_bp exp/segmentation_sad_snr/nnet_tdnn_j_n4 data/ami_sdm1_dev"
+  echo "    mfcc_hires exp/segmentation_sad_snr/nnet_tdnn_j_n4 data/ami_sdm1_dev"
   echo ""
   echo "Options: "
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."

From ce3cbab528f14db3bcaa4d15391004a101a0e19b Mon Sep 17 00:00:00 2001
From: Vimal Manohar <vimal.manohar91@gmail.com>
Date: Thu, 25 Jan 2018 18:20:21 -0500
Subject: [PATCH 06/11] Simplifying recipe

---
 egs/swbd/s5c/local/run_asr_segmentation.sh    | 68 +++++++--------
 .../s5c/local/run_cleanup_segmentation.sh     | 15 ++--
 .../segmentation/combine_targets_dirs.sh      | 83 -------------------
 .../local/segmentation/copy_targets_dir.sh    | 77 -----------------
 .../tuning/train_lstm_asr_sad_1a.sh           |  3 +-
 .../tuning/train_stats_asr_sad_1a.sh          | 13 +--
 .../segmentation/combine_targets_dirs.sh      | 55 ++++++++++++
 .../s5/steps/segmentation/copy_targets_dir.sh | 46 ++++++++++
 .../segmentation/detect_speech_activity.sh    |  5 +-
 .../steps/segmentation/prepare_targets_gmm.sh |  3 +-
 10 files changed, 148 insertions(+), 220 deletions(-)
 delete mode 100755 egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh
 delete mode 100755 egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
 create mode 100755 egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
 create mode 100755 egs/wsj/s5/steps/segmentation/copy_targets_dir.sh

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index 7129e905480..21c20b0a423 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -10,7 +10,7 @@
 lang=data/lang   # Must match the one used to train the models
 lang_test=data/lang_nosp_sw1_tg  # Lang directory for decoding.
 
-data_dir=data/train 
+data_dir=data/train
 # Model directory used to align the $data_dir to get target labels for training
 # SAD. This should typically be a speaker-adapted system.
 sat_model_dir=exp/tri4
@@ -37,7 +37,6 @@ nstage=-10
 train_stage=-10
 test_stage=-10
 num_data_reps=2
-base_rirs=simulated
 affix=_1a
 stage=-1
 nj=80
@@ -113,6 +112,7 @@ if [ $stage -le 3 ]; then
     --nj 80 --reco-nj 40 --lang-test $lang_test \
     --garbage-phones-list $dir/garbage_phones.txt \
     --silence-phones-list $dir/silence_phones.txt \
+    --merge-weights $merge_weights \
     $lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
 fi
 
@@ -124,20 +124,14 @@ if [ $stage -le 4 ]; then
     fi
 
     rvb_opts=()
-    if [ "$base_rirs" == "simulated" ]; then
-	# This is the config for the system using simulated RIRs and point-source noises
-	rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
-	rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
-	rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
-    else
-	# This is the config for the JHU ASpIRE submission system
-	rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list")
-	rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list)
-    fi
+    # This is the config for the system using simulated RIRs and point-source noises
+    rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+    rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+    rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
 
     foreground_snrs="20:10:15:5:0"
     background_snrs="20:10:15:5:0"
-    num_reps=1
+    num_data_reps=1
     # corrupt the data to generate multi-condition data
     # for data_dir in train dev test; do
     python steps/data/reverberate_data_dir.py \
@@ -148,41 +142,36 @@ if [ $stage -le 4 ]; then
 	   --speech-rvb-probability 0.5 \
 	   --pointsource-noise-addition-probability 0.5 \
 	   --isotropic-noise-addition-probability 0.7 \
-	   --num-replications $num_reps \
+	   --num-replications $num_data_reps \
 	   --max-noises-per-minute 4 \
 	   --source-sampling-rate 8000 \
 	   $whole_data_dir $rvb_data_dir
 
+    rvb_dirs=()
     for i in `seq 1 $num_data_reps`; do
-	local/segmentation/copy_targets_dir.sh --cmd "$decode_cmd" --utt-prefix "rev${i}_" exp/segmentation_1a/train_whole_combined_targets_sub3 exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i || exit 1;
-	rvb_dirs+=" exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i"
+      steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
+        exp/segmentation_1a/train_whole_combined_targets_sub3 \
+        exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i || exit 1;
+      rvb_dirs+=(exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i)
     done
 
-    local/segmentation/combine_targets_dirs.sh $rvb_data_dir exp/segmentation_1a/train_whole_combined_targets_sub3_rvb $rvb_dirs || exit 1;
-    cp exp/segmentation_1a/train_whole_combined_targets_sub3_rvb/targets.scp  exp/segmentation_1a/
+    steps/segmentation/combine_targets_dirs.sh \
+      $rvb_data_dir exp/segmentation_1a/train_whole_combined_targets_sub3_rvb \
+      $rvb_dirs || exit 1;
 fi
 
 if [ $stage -le 5 ]; then
   utils/copy_data_dir.sh ${rvb_data_dir} ${rvb_data_dir}_hires
-  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 10 \
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 80 \
     ${rvb_data_dir}_hires
   steps/compute_cmvn_stats.sh ${rvb_data_dir}_hires
 fi
 
-# if [ $stage -le 6 ]; then
-#   # Train a TDNN-LSTM network for SAD
-#   local/segmentation/tuning/train_lstm_asr_sad_1a.sh \
-#     --stage $nstage --train-stage $train_stage \
-#     --targets-dir $dir \
-#     --data-dir ${rvb_data_dir}_hires
-# fi
-
 if [ $stage -le 6 ]; then
   # Train a STATS-pooling network for SAD
-
     local/segmentation/tuning/train_stats_asr_sad_1a.sh \
     --stage $nstage --train-stage $train_stage \
-    --targets-dir $dir \
+    --targets-dir exp/segmentation_1a/train_whole_combined_targets_sub3_rvb \
     --data-dir ${rvb_data_dir}_hires
 fi
 
@@ -199,30 +188,29 @@ if [ $stage -le 7 ]; then
     --extra-left-context-initial 0 --extra-right-context-final 0 \
     --nj 32 --acwt 0.3 --stage $test_stage \
     data/eval2000 \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2 \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a \
     mfcc_hires \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/{,eval2000}
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a/{,eval2000}
 fi
 
 if [ $stage -le 8 ]; then
   # Do some diagnostics
   steps/segmentation/evaluate_segmentation.pl data/eval2000/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments &> \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/evalutate_segmentation.log
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/segments &> \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/evalutate_segmentation.log
   
   steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/utt2spk \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/utt2spk \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/segments \
+    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/sys.rttm
 
 #  export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin
 #  md-eval.pl -c 0.25 -r $eval2000_rttm_file \
-#    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/sys.rttm > \
-#    exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg/md_eval.log
+#    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/sys.rttm > \
+#    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/md_eval.log
 fi
 
 if [ $stage -le 9 ]; then
-  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a2/eval2000_seg \
+  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg \
     data/eval2000.seg_asr_sad_1a
 fi
-  
diff --git a/egs/swbd/s5c/local/run_cleanup_segmentation.sh b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
index 8b08422d277..c879a55d16a 100755
--- a/egs/swbd/s5c/local/run_cleanup_segmentation.sh
+++ b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 
-#           2017  Nagendra Kumar Goel
-#           2016  Vimal Manohar
-#           2016  Johns Hopkins University (author: Daniel Povey)
+# Copyright   2016  Vimal Manohar
+#             2016  Johns Hopkins University (author: Daniel Povey)
+#             2017  Nagendra Kumar Goel
 # Apache 2.0
 
 # This script demonstrates how to re-segment training data selecting only the
@@ -23,9 +23,9 @@ set -u
 
 stage=0
 cleanup_stage=0
-data=data/train
+data=data/train_nodup
 cleanup_affix=cleaned
-srcdir=exp/tri4_mmi_b0.1
+srcdir=exp/tri4
 langdir=data/lang_sw1_tg
 nj=100
 decode_nj=16
@@ -42,7 +42,8 @@ cleaned_dir=${srcdir}_${cleanup_affix}
 
 if [ $stage -le 1 ]; then
   # This does the actual data cleanup.
-  steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \
+  steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage \
+    --nj $nj --cmd "$train_cmd" \
     $data $langdir $srcdir $dir $cleaned_data
 fi
 
@@ -53,5 +54,5 @@ fi
 
 if [ $stage -le 3 ]; then
   steps/train_sat.sh --cmd "$train_cmd" \
-    5000 100000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir}
+    11500 200000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir}
 fi
diff --git a/egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh b/egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh
deleted file mode 100755
index 48c4ce93db0..00000000000
--- a/egs/swbd/s5c/local/segmentation/combine_targets_dirs.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/bin/bash
-# Copyright 2017 Nagendra Kumar Goel
-# Apache 2.0.
-
-# This srcipt operates on targets directories, such as exp/segmentation_1a/train_whole_combined_targets_sub3
-# the output is a new targets dir which has targets from all the input targets dirs
-
-# Begin configuration section.
-cmd=run.pl
-extra_files=
-num_jobs=4
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-. parse_options.sh || exit 1;
-
-if [[ $# -lt 3 ]]; then
-  echo "Usage: $0 [options] <data> <dest-targets-dir> <src-targets-dir1> <src-targets-dir2> ..."
-  echo "e.g.: $0 --num-jobs 32 data/train exp/targets_combined exp/targets_1 exp/targets_2"
-  echo "Options:"
-  echo " --extra-files <file1 file2...>   # specify addtional files in 'src-targets-dir1' to copy"
-  echo " --num-jobs <nj>                  # number of jobs used to split the data directory."
-  echo " Note, files that don't appear in the first source dir will not be added even if they appear in later ones."
-  echo " Other than alignments, only files from the first src ali dir are copied."
-  exit 1;
-fi
-
-data=$1;
-shift;
-dest=$1;
-shift;
-first_src=$1;
-
-mkdir -p $dest;
-rm $dest/{targets.*.ark,frame_subsampling_factor} 2>/dev/null
-
-cp $first_src/frame_subsampling_factor $dest 2>/dev/null
-
-export LC_ALL=C
-
-for dir in $*; do
-  if [ ! -f $dir/targets.1.ark ]; then
-    echo "$0: check if targets (targets.*.ark) are present in $dir."
-    exit 1;
-  fi
-done
-
-for dir in $*; do
-  for f in frame_subsampling_factor; do
-    diff $first_src/$f $dir/$f 1>/dev/null 2>&1
-    if [ $? -ne 0 ]; then
-      echo "$0: Cannot combine alignment directories with different $f files."
-    fi
-  done
-done
-
-for f in frame_subsampling_factor $extra_files; do
-  if [ ! -f $first_src/$f ]; then
-    echo "combine_targets_dir.sh: no such file $first_src/$f"
-    exit 1;
-  fi
-  cp $first_src/$f $dest/
-done
-
-src_id=0
-temp_dir=$dest/temp
-[ -d $temp_dir ] && rm -r $temp_dir;
-mkdir -p $temp_dir
-echo "$0: dumping targets in each source directory as single archive and index."
-for dir in $*; do
-  src_id=$((src_id + 1))
-  cur_num_jobs=$(ls $dir/targets.*.ark | wc -l) || exit 1;
-  tgts=$(for n in $(seq $cur_num_jobs); do echo -n "$dir/targets.$n.ark "; done)
-  $cmd $dir/log/copy_targets.log \
-    copy-matrix "ark:cat $tgts|" \
-    ark,scp:$temp_dir/targets.$src_id.ark,$temp_dir/targets.$src_id.scp || exit 1;
-done
-sort -m $temp_dir/targets.*.scp > $dest/targets.scp || exit 1;
-
-
-echo "Combined targets and stored in $dest"
-exit 0
diff --git a/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh b/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
deleted file mode 100755
index 81c9193d22e..00000000000
--- a/egs/swbd/s5c/local/segmentation/copy_targets_dir.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-
-# Copyright    2017  Nagendra Kumar Goel
-#              2014  Johns Hopkins University (author: Nagendra K Goel)
-# Apache 2.0
-
-# This script operates on a directory, such as in exp/segmentation_1a/train_whole_combined_targets_rev1,
-# that contains some subset of the following files:
-# targets.X.ark
-# frame_subsampling_factor
-# It copies to another directory, possibly adding a specified prefix or a suffix
-# to the utterance names.
-
-
-# begin configuration section
-utt_prefix=
-utt_suffix=
-cmd=run.pl
-# end configuration section
-
-. utils/parse_options.sh
-
-if [ $# != 2 ]; then
-  echo "Usage: "
-  echo "  $0 [options] <src_dir> <dest_dir>"
-  echo "e.g.:"
-  echo " $0  --utt-prefix=1- exp/segmentation_1a/train_whole_combined_targets_sub3 exp/segmentation_1a/train_whole_combined_targets_sub3_rev1"
-  echo "Options"
-  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
-  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
-  exit 1;
-fi
-
-
-export LC_ALL=C
-
-src_dir=$1
-dest_dir=$2
-
-mkdir -p $dest_dir
-
-if [ ! -f $src_dir/targets.1.ark ]; then
-  echo "copy_targets_dir.sh: no such files $src_dir/targets.1.ark"
-  exit 1;
-fi
-
-for f in frame_subsampling_factor; do
-  if [ ! -f $src_dir/$f ]; then
-    echo "$0: no such file $src_dir/$f this might be serious error."
-    continue
-  fi
-  cp $src_dir/$f $dest_dir/
-done
-
-nj=$(ls $src_dir/targets.*.ark | wc -l)
-mkdir -p $dest_dir/temp
-cat << EOF > $dest_dir/temp/copy_targets.sh
-set -e;
-id=\$1
-echo "$src_dir/targets.\$id.ark"
-copy-matrix ark:$src_dir/targets.\$id.ark ark,t:- | \
-python -c "
-import sys
-for line in sys.stdin:
-      parts = line.split()
-      if \"[\" not in line:
-            print line.rstrip()
-      else:
-            print '$utt_prefix{0}$utt_suffix {1}'.format(parts[0], ' '.join(parts[1:]))
-" | \
-  copy-matrix ark,t:- ark:$dest_dir/targets.\$id.ark || exit 1;
-set +o pipefail; # unset the pipefail option.
-EOF
-chmod +x $dest_dir/temp/copy_targets.sh
-$cmd -v PATH JOB=1:$nj $dest_dir/temp/copy_targets.JOB.log $dest_dir/temp/copy_targets.sh JOB || exit 1;
-
-echo "$0: copied targets from $src_dir to $dest_dir"
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 74697df099f..13318756e43 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -9,8 +9,6 @@
 set -o pipefail
 set -u
 
-. ./cmd.sh
-
 # At this script level we don't support not running on GPU, as it would be painfully slow.
 # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
 # --num-threads 16 and --minibatch-size 128.
@@ -50,6 +48,7 @@ affix=1a
 data_dir=exp/segmentation_1a/train_whole_hires_bp
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
+. ./cmd.sh
 if [ -f ./path.sh ]; then . ./path.sh; fi
 . ./utils/parse_options.sh
 
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 3254929306f..96009c69374 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -33,8 +33,8 @@ relu_dim=256
 num_epochs=1
 initial_effective_lrate=0.0003
 final_effective_lrate=0.00003
-num_jobs_initial=1
-num_jobs_final=1
+num_jobs_initial=3
+num_jobs_final=8
 remove_egs=true
 max_param_change=0.2  # Small max-param change for small network
 
@@ -49,7 +49,7 @@ affix=1a2
 data_dir=exp/segmentation_1a/train_whole_rvb_hires
 targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
 
-. cmd.sh
+. ./cmd.sh
 if [ -f ./path.sh ]; then . ./path.sh; fi
 . ./utils/parse_options.sh
 
@@ -135,9 +135,10 @@ if [ $stage -le 6 ]; then
 fi
 
 if [ $stage -le 7 ]; then
-    copy-feats scp:$targets_dir/targets.scp ark:- | \
-    matrix-sum-rows ark:- ark:- | vector-sum --binary=false ark:- - | \
-    awk '{print " [ "$2" "$3" "$4" ]"}' > $dir/post_output.vec
+  # Use a subset to compute prior over the output targets
+  $cmd $dir/log/get_priors.log \
+    matrix-sum-rows "scp:utils/subset_scp.pl --quiet 1000 $targets_dir/targets.scp |" \
+    ark:- \| vector-sum --binary=false ark:- $dir/post_output.vec || exit 1
 
   echo 3 > $dir/frame_subsampling_factor
 fi
diff --git a/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh b/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
new file mode 100755
index 00000000000..f6be21e16f3
--- /dev/null
+++ b/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright 2017 Nagendra Kumar Goel
+#           2018 Vimal Manohar   
+# Apache 2.0.
+
+# This script combines targets directory into a new targets directory 
+# containing targets from all the input targets directories.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# -lt 3 ]; then
+  echo "Usage: $0 [options] <data> <dest-targets-dir> <src-targets-dir1> <src-targets-dir2> ..."
+  echo "e.g.: $0 data/train exp/targets_combined exp/targets_1 exp/targets_2"
+  exit 1;
+fi
+
+export LC_ALL=C
+
+data=$1;
+shift;
+dest=$1;
+shift;
+first_src=$1;
+
+mkdir -p $dest;
+rm -f $dest/{targets.*.ark,frame_subsampling_factor} 2>/dev/null
+
+frame_subsampling_factor=1
+if [ -f $first_src/frame_subsampling_factor ]; then
+  cp $first_src/frame_subsampling_factor $dest
+  frame_subsampling_factor=$(cat $dest/frame_subsampling_factor)
+fi
+
+for d in $*; do
+  this_frame_subsampling_factor=1
+  if [ -f $d/frame_subsampling_factor ]; then
+    this_frame_subsampling_factor=$(cat $d/frame_subsampling_factor)
+  fi
+
+  if [ $this_frame_subsampling_factor != $frame_subsampling_factor ]; then
+    echo "$0: Cannot combine targets directories with different frame-subsampling-factors" 1>&2
+    exit 1
+  fi
+
+  cat $d/targets.scp
+done | sort -k1,1 > $dest/targets.scp || exit 1
+
+steps/segmentation/verify_targets_dir.sh $data $dest || exit 1
+
+echo "Combined targets and stored in $dest"
+exit 0
diff --git a/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh b/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh
new file mode 100755
index 00000000000..f15206b1f7d
--- /dev/null
+++ b/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright    2017  Nagendra Kumar Goel
+#              2014  Johns Hopkins University (author: Nagendra K Goel)
+# Apache 2.0
+
+# This script makes a copy of targets directory (by copying targets.scp),
+# possibly adding a specified prefix or a suffix to the utterance names.
+
+# begin configuration section
+utt_prefix=
+utt_suffix=
+# end configuration section
+
+if [ -f ./path.sh ]; then . ./path.sh; fi
+. ./utils/parse_options.sh
+
+if [ $# != 2 ]; then
+  echo "Usage: "
+  echo "  $0 [options] <srcdir> <destdir>"
+  echo "e.g.:"
+  echo " $0  --utt-prefix=1- exp/segmentation_1a/train_whole_combined_targets_sub3 exp/segmentation_1a/train_whole_combined_targets_sub3_rev1"
+  echo "Options"
+  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
+  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
+  exit 1;
+fi
+
+export LC_ALL=C
+
+srcdir=$1
+destdir=$2
+
+mkdir -p $destdir
+
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  cp $srcdir/frame_subsampling_factor $destdir
+fi
+
+cat $srcdir/targets.scp | awk -v p=$utt_prefix -v s=$utt_suffix \
+  '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
+
+cat $srcdir/targets.scp | utils/apply_map.pl -f 1 $destdir/utt_map | \
+  sort -k1,1 > $destdir/targets.scp
+
+echo "$0: copied targets from $srcdir to $destdir"
diff --git a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
index 9bc8eea675c..60e3df20df2 100755
--- a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
+++ b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
@@ -13,17 +13,16 @@ set -e
 set -o pipefail
 set -u
 
-. ./cmd.sh
 if [ -f ./path.sh ]; then . ./path.sh; fi
 
 affix=  # Affix for the segmentation
 nj=32
-cmd=$decode_cmd
+cmd=queue.pl
 stage=-1
 
 # Feature options (Must match training)
 mfcc_config=conf/mfcc_hires.conf
-feat_affix=hires   # Affix for the type of feature used
+feat_affix=   # Affix for the type of feature used
 
 convert_data_dir_to_whole=true    # If true, the input data directory is 
                                   # first converted to whole data directory (i.e. whole recordings)
diff --git a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
index de19cfc6772..f8557a70177 100755
--- a/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
+++ b/egs/wsj/s5/steps/segmentation/prepare_targets_gmm.sh
@@ -1,7 +1,6 @@
 #! /bin/bash
 
 # Copyright 2017  Vimal Manohar
-#           2017  Nagendra Kumar Goel
 # Apache 2.0
   
 # This script prepares targets for training neural network for 
@@ -211,7 +210,7 @@ if [ $stage -le 5 ]; then
   # the speech / silence decisions, not the exact word sequences.
   steps/decode.sh --cmd "$decode_cmd --mem 2G" --nj $nj \
     --max-active 1000 --beam 10.0 \
-    --skip-scoring true \
+    --decode-extra-opts "--word-determinize=false" --skip-scoring true \
     $graph_dir $uniform_seg_data_dir $decode_dir
 fi
 

From b43e5dcb87ed7d04725c8e69c82f9e1779ca20a0 Mon Sep 17 00:00:00 2001
From: Vimal Manohar <vimal.manohar91@gmail.com>
Date: Fri, 26 Jan 2018 12:22:05 -0500
Subject: [PATCH 07/11] simplifying stuff

---
 egs/swbd/s5c/local/run_asr_segmentation.sh | 78 ++++++++++------------
 1 file changed, 37 insertions(+), 41 deletions(-)

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index 21c20b0a423..4bc43007aca 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -10,7 +10,7 @@
 lang=data/lang   # Must match the one used to train the models
 lang_test=data/lang_nosp_sw1_tg  # Lang directory for decoding.
 
-data_dir=data/train
+data_dir=data/train_nodup
 # Model directory used to align the $data_dir to get target labels for training
 # SAD. This should typically be a speaker-adapted system.
 sat_model_dir=exp/tri4
@@ -18,15 +18,8 @@ sat_model_dir=exp/tri4
 # get target labels for training SAD. This should typically be a 
 # speaker-independent system like LDA+MLLT system.
 model_dir=exp/tri3
-graph_dir=    # If not provided, a new one will be created using $lang_test
-
-# Uniform segmentation options for decoding whole recordings. All values are in
-# seconds.
-max_segment_duration=10
-overlap_duration=2.5
-max_remaining_duration=5  # If the last remaining piece when splitting uniformly
-                          # is smaller than this duration, then the last piece 
-                          # is  merged with the previous.
+graph_dir=    # Graph for decoding whole-recording version of $data_dir.
+              # If not provided, a new one will be created using $lang_test
 
 # List of weights on labels obtained from alignment, 
 # labels obtained from decoding and default labels in out-of-segment regions
@@ -37,7 +30,7 @@ nstage=-10
 train_stage=-10
 test_stage=-10
 num_data_reps=2
-affix=_1a
+affix=_1a   # For segmentation
 stage=-1
 nj=80
 
@@ -77,7 +70,10 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
 fi
 
 whole_data_dir=${data_dir}_whole
-rvb_data_dir=${whole_data_dir}_rvb
+targets_dir=exp/segmentation${affix}/train_whole_combined_targets_sub3
+
+rvb_data_dir=${whole_data_dir}_rvb_hires
+rvb_targets_dir=${targets_dir}_rvb
 
 if [ $stage -le 0 ]; then
   utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
@@ -112,15 +108,16 @@ if [ $stage -le 3 ]; then
     --nj 80 --reco-nj 40 --lang-test $lang_test \
     --garbage-phones-list $dir/garbage_phones.txt \
     --silence-phones-list $dir/silence_phones.txt \
-    --merge-weights $merge_weights \
+    --merge-weights "$merge_weights" \
+    --graph-dir "$graph_dir" \
     $lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
 fi
 
 if [ $stage -le 4 ]; then
     # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
     if [ ! -f rirs_noises.zip ]; then
-	wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-	unzip rirs_noises.zip
+      wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+      unzip rirs_noises.zip
     fi
 
     rvb_opts=()
@@ -131,7 +128,6 @@ if [ $stage -le 4 ]; then
 
     foreground_snrs="20:10:15:5:0"
     background_snrs="20:10:15:5:0"
-    num_data_reps=1
     # corrupt the data to generate multi-condition data
     # for data_dir in train dev test; do
     python steps/data/reverberate_data_dir.py \
@@ -147,70 +143,70 @@ if [ $stage -le 4 ]; then
 	   --source-sampling-rate 8000 \
 	   $whole_data_dir $rvb_data_dir
 
-    rvb_dirs=()
+    rvb_targets_dirs=()
     for i in `seq 1 $num_data_reps`; do
       steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
-        exp/segmentation_1a/train_whole_combined_targets_sub3 \
-        exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i || exit 1;
-      rvb_dirs+=(exp/segmentation_1a/train_whole_combined_targets_sub3_temp_$i)
+        $targets_dir ${targets_dir}_temp_$i || exit 1
+      rvb_targets_dirs+=(${targets_dir}_temp_$i)
     done
 
     steps/segmentation/combine_targets_dirs.sh \
-      $rvb_data_dir exp/segmentation_1a/train_whole_combined_targets_sub3_rvb \
-      $rvb_dirs || exit 1;
+      $rvb_data_dir ${rvb_targets_dir} \
+      ${rvb_targets_dirs[@]} || exit 1;
+
+    rm -r ${rvb_targets_dirs[@]}
 fi
 
 if [ $stage -le 5 ]; then
-  utils/copy_data_dir.sh ${rvb_data_dir} ${rvb_data_dir}_hires
   steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 80 \
-    ${rvb_data_dir}_hires
-  steps/compute_cmvn_stats.sh ${rvb_data_dir}_hires
+    ${rvb_data_dir}
+  steps/compute_cmvn_stats.sh ${rvb_data_dir}
 fi
 
 if [ $stage -le 6 ]; then
   # Train a STATS-pooling network for SAD
-    local/segmentation/tuning/train_stats_asr_sad_1a.sh \
+  local/segmentation/tuning/train_stats_asr_sad_1a.sh \
     --stage $nstage --train-stage $train_stage \
-    --targets-dir exp/segmentation_1a/train_whole_combined_targets_sub3_rvb \
-    --data-dir ${rvb_data_dir}_hires
+    --targets-dir ${rvb_targets_dir} \
+    --data-dir ${rvb_data_dir} --affix "1a" || exit 1
 fi
 
 if [ $stage -le 7 ]; then
   # The options to this script must match the options used in the 
   # nnet training script. 
-  # e.g. extra-left-context is 70, because the model is an LSTM trained with a 
-  # chunk-left-context of 60. 
+  # e.g. extra-left-context is 79, because the model is an stats pooling network 
+  # trained with a chunk-left-context of 79 and chunk-right-context of 21. 
   # Note: frames-per-chunk is 150 even though the model was trained with 
   # chunk-width of 20. This is just for speed.
   # See the script for details of the options.
   steps/segmentation/detect_speech_activity.sh \
-    --extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 \
+    --extra-left-context 79 --extra-right-context 21 --frames-per-chunk 150 \
     --extra-left-context-initial 0 --extra-right-context-final 0 \
     --nj 32 --acwt 0.3 --stage $test_stage \
     data/eval2000 \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a \
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a \
     mfcc_hires \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a/{,eval2000}
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/{,eval2000}
 fi
 
 if [ $stage -le 8 ]; then
   # Do some diagnostics
   steps/segmentation/evaluate_segmentation.pl data/eval2000/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/segments &> \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/evalutate_segmentation.log
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/segments &> \
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/evalutate_segmentation.log
   
   steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/utt2spk \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/segments \
-    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/sys.rttm
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/utt2spk \
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/segments \
+    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/sys.rttm
 
 #  export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin
 #  md-eval.pl -c 0.25 -r $eval2000_rttm_file \
-#    -s exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/sys.rttm > \
-#    exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg/md_eval.log
+#    -s exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/sys.rttm > \
+#    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/md_eval.log
 fi
 
 if [ $stage -le 9 ]; then
-  utils/copy_data_dir.sh exp/segmentation_1a/tdnn_stats_asr_sad_1a/eval2000_seg \
+  utils/copy_data_dir.sh exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg \
     data/eval2000.seg_asr_sad_1a
 fi

From a1224eeea978174c46dfd4c57d9c8a122dbf4d49 Mon Sep 17 00:00:00 2001
From: Vimal Manohar <vimal.manohar91@gmail.com>
Date: Mon, 5 Feb 2018 00:37:07 -0500
Subject: [PATCH 08/11] Minor bug fix

---
 egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh b/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
index f6be21e16f3..8135d089f5b 100755
--- a/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
+++ b/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
@@ -49,7 +49,7 @@ for d in $*; do
   cat $d/targets.scp
 done | sort -k1,1 > $dest/targets.scp || exit 1
 
-steps/segmentation/verify_targets_dir.sh $data $dest || exit 1
+steps/segmentation/validate_targets_dir.sh $dest $data || exit 1
 
 echo "Combined targets and stored in $dest"
 exit 0

From e5a454a1ac1fc10ce2a812c68a20e91036ac423e Mon Sep 17 00:00:00 2001
From: Vimal Manohar <vimal.manohar91@gmail.com>
Date: Mon, 5 Feb 2018 09:30:36 -0500
Subject: [PATCH 09/11] Minor bug fixes

---
 egs/swbd/s5c/local/run_asr_segmentation.sh | 106 +++++++++++----------
 1 file changed, 55 insertions(+), 51 deletions(-)

diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index 4bc43007aca..6d935616225 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -52,14 +52,14 @@ garbage_phones="lau spn"
 silence_phones="sil"
 
 for p in $garbage_phones; do 
-  for affix in "" "_B" "_E" "_I" "_S"; do
-    echo "$p$affix"
+  for a in "" "_B" "_E" "_I" "_S"; do
+    echo "$p$a"
   done
 done > $dir/garbage_phones.txt
 
 for p in $silence_phones; do 
-  for affix in "" "_B" "_E" "_I" "_S"; do
-    echo "$p$affix"
+  for a in "" "_B" "_E" "_I" "_S"; do
+    echo "$p$a"
   done
 done > $dir/silence_phones.txt
 
@@ -69,8 +69,9 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
   exit 1
 fi
 
+data_id=$(basename $data_dir)
 whole_data_dir=${data_dir}_whole
-targets_dir=exp/segmentation${affix}/train_whole_combined_targets_sub3
+targets_dir=exp/segmentation${affix}/${data_id}_whole_combined_targets_sub3
 
 rvb_data_dir=${whole_data_dir}_rvb_hires
 rvb_targets_dir=${targets_dir}_rvb
@@ -84,8 +85,8 @@ fi
 ###############################################################################
 if [ $stage -le 1 ]; then
   steps/make_mfcc.sh --nj 50 --cmd "$train_cmd"  --write-utt2num-frames true \
-    $whole_data_dir exp/make_mfcc/train_whole
-  steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/train_whole
+    $whole_data_dir exp/make_mfcc/${data_id}_whole
+  steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/${data_id}_whole
   utils/fix_data_dir.sh $whole_data_dir
 fi
 
@@ -114,56 +115,59 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ]; then
-    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-    if [ ! -f rirs_noises.zip ]; then
-      wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-      unzip rirs_noises.zip
-    fi
-
-    rvb_opts=()
-    # This is the config for the system using simulated RIRs and point-source noises
-    rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
-    rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
-    rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
-
-    foreground_snrs="20:10:15:5:0"
-    background_snrs="20:10:15:5:0"
-    # corrupt the data to generate multi-condition data
-    # for data_dir in train dev test; do
-    python steps/data/reverberate_data_dir.py \
-	   "${rvb_opts[@]}" \
-	   --prefix "rev" \
-	   --foreground-snrs $foreground_snrs \
-	   --background-snrs $background_snrs \
-	   --speech-rvb-probability 0.5 \
-	   --pointsource-noise-addition-probability 0.5 \
-	   --isotropic-noise-addition-probability 0.7 \
-	   --num-replications $num_data_reps \
-	   --max-noises-per-minute 4 \
-	   --source-sampling-rate 8000 \
-	   $whole_data_dir $rvb_data_dir
-
-    rvb_targets_dirs=()
-    for i in `seq 1 $num_data_reps`; do
-      steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
-        $targets_dir ${targets_dir}_temp_$i || exit 1
-      rvb_targets_dirs+=(${targets_dir}_temp_$i)
-    done
-
-    steps/segmentation/combine_targets_dirs.sh \
-      $rvb_data_dir ${rvb_targets_dir} \
-      ${rvb_targets_dirs[@]} || exit 1;
-
-    rm -r ${rvb_targets_dirs[@]}
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  if [ ! -f rirs_noises.zip ]; then
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+
+  rvb_opts=()
+  # This is the config for the system using simulated RIRs and point-source noises
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+  rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list)
+
+  foreground_snrs="20:10:15:5:0"
+  background_snrs="20:10:15:5:0"
+  # corrupt the data to generate multi-condition data
+  # for data_dir in train dev test; do
+  python steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --prefix "rev" \
+    --foreground-snrs $foreground_snrs \
+    --background-snrs $background_snrs \
+    --speech-rvb-probability 0.5 \
+    --pointsource-noise-addition-probability 0.5 \
+    --isotropic-noise-addition-probability 0.7 \
+    --num-replications $num_data_reps \
+    --max-noises-per-minute 4 \
+    --source-sampling-rate 8000 \
+    $whole_data_dir $rvb_data_dir
 fi
 
 if [ $stage -le 5 ]; then
   steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 80 \
     ${rvb_data_dir}
   steps/compute_cmvn_stats.sh ${rvb_data_dir}
+  utils/fix_data_dir.sh $rvb_data_dir
 fi
 
 if [ $stage -le 6 ]; then
+  rvb_targets_dirs=()
+  for i in `seq 1 $num_data_reps`; do
+    steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
+      $targets_dir ${targets_dir}_temp_$i || exit 1
+    rvb_targets_dirs+=(${targets_dir}_temp_$i)
+  done
+
+  steps/segmentation/combine_targets_dirs.sh \
+    $rvb_data_dir ${rvb_targets_dir} \
+    ${rvb_targets_dirs[@]} || exit 1;
+
+  rm -r ${rvb_targets_dirs[@]}
+fi
+
+if [ $stage -le 7 ]; then
   # Train a STATS-pooling network for SAD
   local/segmentation/tuning/train_stats_asr_sad_1a.sh \
     --stage $nstage --train-stage $train_stage \
@@ -171,7 +175,7 @@ if [ $stage -le 6 ]; then
     --data-dir ${rvb_data_dir} --affix "1a" || exit 1
 fi
 
-if [ $stage -le 7 ]; then
+if [ $stage -le 8 ]; then
   # The options to this script must match the options used in the 
   # nnet training script. 
   # e.g. extra-left-context is 79, because the model is an stats pooling network 
@@ -189,7 +193,7 @@ if [ $stage -le 7 ]; then
     exp/segmentation${affix}/tdnn_stats_asr_sad_1a/{,eval2000}
 fi
 
-if [ $stage -le 8 ]; then
+if [ $stage -le 9 ]; then
   # Do some diagnostics
   steps/segmentation/evaluate_segmentation.pl data/eval2000/segments \
     exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/segments &> \
@@ -206,7 +210,7 @@ if [ $stage -le 8 ]; then
 #    exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg/md_eval.log
 fi
 
-if [ $stage -le 9 ]; then
+if [ $stage -le 10 ]; then
   utils/copy_data_dir.sh exp/segmentation${affix}/tdnn_stats_asr_sad_1a/eval2000_seg \
     data/eval2000.seg_asr_sad_1a
 fi

From fb19685f72364d70afdddff6d35b440e29efc09d Mon Sep 17 00:00:00 2001
From: Vimal Manohar <vimal.manohar91@gmail.com>
Date: Mon, 5 Feb 2018 15:50:08 -0500
Subject: [PATCH 10/11] Making the split per-spk instead of per-utt

---
 egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh b/egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh
index 29d52588807..d47daac1bc0 100755
--- a/egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh
+++ b/egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh
@@ -72,9 +72,9 @@ fi
 nj=$(cat $latdir/num_jobs)
 oov=$(cat $lang/oov.int)
 
-utils/split_data.sh --per-utt $data $nj
+utils/split_data.sh $data $nj
 
-sdata=$data/split${nj}utt
+sdata=$data/split${nj}
 
 if [ $stage -le 1 ]; then
   $cmd JOB=1:$nj $dir/log/get_oracle.JOB.log \

From b539e3d2972a7a6012d2f9d8290e321297e08c2e Mon Sep 17 00:00:00 2001
From: Vimal Manohar <vimal.manohar91@gmail.com>
Date: Tue, 6 Feb 2018 15:14:49 -0500
Subject: [PATCH 11/11] swbd_sad: Minor fix

---
 .../s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 96009c69374..05e5f4ded05 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -136,7 +136,7 @@ fi
 
 if [ $stage -le 7 ]; then
   # Use a subset to compute prior over the output targets
-  $cmd $dir/log/get_priors.log \
+  $train_cmd $dir/log/get_priors.log \
     matrix-sum-rows "scp:utils/subset_scp.pl --quiet 1000 $targets_dir/targets.scp |" \
     ark:- \| vector-sum --binary=false ark:- $dir/post_output.vec || exit 1