diff --git a/egs/sre08/v1/sid/compute_vad_decision.sh b/egs/sre08/v1/sid/compute_vad_decision.sh
deleted file mode 100755
index 7099d063c7f..00000000000
--- a/egs/sre08/v1/sid/compute_vad_decision.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash 
-
-# Copyright    2013  Daniel Povey
-# Apache 2.0
-# To be run from .. (one directory up from here)
-# see ../run.sh for example
-
-# Compute energy based VAD output 
-# We do this in just one job; it's fast.
-#
-
-nj=2
-cmd=run.pl
-vad_config=conf/vad.conf
-
-echo "$0 $@"  # Print the command line for logging
-
-if [ -f path.sh ]; then . ./path.sh; fi
-. parse_options.sh || exit 1;
-
-if [ $# != 3 ]; then
-   echo "Usage: $0 [options] <data-dir> <log-dir> <path-to-vad-dir>";
-   echo "e.g.: $0 data/train exp/make_vad mfcc"
-   echo " Options:"
-   echo "  --vad-config <config-file>                       # config passed to compute-vad-energy"
-   echo "  --nj <nj>                                        # number of parallel jobs"
-   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
-   exit 1;
-fi
-
-data=$1
-logdir=$2
-vaddir=$3
-
-# make $vaddir an absolute pathname.
-vaddir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vaddir ${PWD}`
-
-# use "name" as part of name of the archive.
-name=`basename $data`
-
-mkdir -p $vaddir || exit 1;
-mkdir -p $logdir || exit 1;
-
-
-for f in $data/feats.scp "$vad_config"; do
-  if [ ! -f $f ]; then
-    echo "compute_vad_decision.sh: no such file $f"
-    exit 1;
-  fi
-done
-
-utils/split_data.sh $data $nj || exit 1;
-sdata=$data/split$nj;
-
-$cmd JOB=1:$nj $logdir/vad_${name}.JOB.log \
-  compute-vad --config=$vad_config scp:$sdata/JOB/feats.scp ark,scp:$vaddir/vad_${name}.JOB.ark,$vaddir/vad_${name}.JOB.scp \
-  || exit 1;
-
-for ((n=1; n<=nj; n++)); do
-  cat $vaddir/vad_${name}.$n.scp || exit 1;
-done > $data/vad.scp
-
-nc=`cat $data/vad.scp | wc -l` 
-nu=`cat $data/feats.scp | wc -l` 
-if [ $nc -ne $nu ]; then
-  echo "**Warning it seems not all of the speakers got VAD output ($nc != $nu);"
-  echo "**validate_data_dir.sh will fail; you might want to use fix_data_dir.sh"
-  [ $nc -eq 0 ] && exit 1;
-fi
-
-
-echo "Created VAD output for $name"
diff --git a/egs/sre08/v1/sid/compute_vad_decision.sh b/egs/sre08/v1/sid/compute_vad_decision.sh
new file mode 120000
index 00000000000..174321b847e
--- /dev/null
+++ b/egs/sre08/v1/sid/compute_vad_decision.sh
@@ -0,0 +1 @@
+../steps/compute_vad_decision.sh
\ No newline at end of file
diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
index 670e6c2b714..9bf354b7891 100755
--- a/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
+++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
@@ -6,9 +6,9 @@
 
 # This script demonstrates how to re-segment training data selecting only the
 # "good" audio that matches the transcripts.
-# The basic idea is to decode with an existing in-domain acoustic model, and a
-# biased language model built from the reference, and then work out the
-# segmentation from a ctm like file.
+# The basic idea is to decode with an existing in-domain GMM acoustic model, and
+# a biased language model built from the reference transcript, and then work out
+# the segmentation from a ctm like file.
 
 set -e -o pipefail
 
diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
new file mode 100755
index 00000000000..306d6d3647a
--- /dev/null
+++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
@@ -0,0 +1,265 @@
+#!/bin/bash
+
+# Copyright 2016  Vimal Manohar
+#           2016  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+# This script demonstrates how to re-segment training data selecting only the
+# "good" audio that matches the transcripts.
+# This script is like clean_and_segment_data.sh, but uses nnet3 model instead of
+# a GMM for decoding.
+# The basic idea is to decode with an existing in-domain nnet3 acoustic model,
+# and a biased language model built from the reference transcript, and then work
+# out the segmentation from a ctm like file.
+
+set -e
+set -o pipefail
+set -u
+
+stage=0
+
+cmd=run.pl
+cleanup=true  # remove temporary directories and files
+nj=4
+# Decode options
+graph_opts=
+beam=15.0
+lattice_beam=1.0
+
+# Contexts must ideally match training
+extra_left_context=0  # Set to some large value, typically 40 for LSTM (must match training)
+extra_right_context=0  
+extra_left_context_initial=-1
+extra_right_context_final=-1
+frames_per_chunk=150
+
+# i-vector options
+extractor=    # i-Vector extractor. If provided, will extract i-vectors. 
+              # Required if the network was trained with i-vector extractor. 
+use_vad=   # Use energy-based VAD for i-vector extraction
+
+segmentation_opts=
+
+. ./path.sh
+. utils/parse_options.sh
+
+
+if [ $# -ne 5 ]; then
+  cat <<EOF
+  Usage: $0 [--extractor <ivector-extractor>] [options] <data> <lang> <srcdir> <dir> <cleaned-data>
+   This script does data cleanup to remove bad portions of transcripts and
+   may do other minor modifications of transcripts such as allowing repetitions
+   for disfluencies, and adding or removing non-scored words (by default:
+   words that map to 'silence phones')
+   Note: <srcdir> is expected to contain a nnet3-based model. 
+   <ivector-extractor> and decoding options like --extra-left-context must match
+   the appropriate options used for training.
+  
+  e.g. $0 data/train data/lang exp/tri3 exp/tri3_cleanup data/train_cleaned
+  main options (for others, see top of script file):
+    --stage <n>             # stage to run from, to enable resuming from partially
+                            # completed run (default: 0)
+    --cmd '$cmd'            # command to submit jobs with (e.g. run.pl, queue.pl)
+    --nj <n>                # number of parallel jobs to use in graph creation and
+                            # decoding
+    --graph-opts 'opts'         # Additional options to make_biased_lm_graphs.sh.
+                                # Please run steps/cleanup/make_biased_lm_graphs.sh
+                                # without arguments to see allowed options.
+    --segmentation-opts 'opts'  # Additional options to segment_ctm_edits.py.
+                                # Please run steps/cleanup/internal/segment_ctm_edits.py
+                                # without arguments to see allowed options.
+    --cleanup        <true|false>  # Clean up intermediate files afterward.  Default true.
+    --extractor <extractor>     # i-vector extractor directory if i-vector is 
+                                # to be used during decoding. Must match
+                                # the extractor used for training neural-network.
+    --use-vad <true|false>      # If true, uses energy-based VAD to apply frame weights
+                                # for i-vector stats extraction
+EOF
+  exit 1
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+data_out=$5
+
+
+extra_files=
+if [ ! -z "$extractor" ]; then
+  extra_files="$extractor/final.ie"
+fi
+
+for f in $srcdir/{final.mdl,tree,cmvn_opts} $data/utt2spk $data/feats.scp \
+  $lang/words.txt $lang/oov.txt $extra_files; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1
+  fi
+done
+
+mkdir -p $dir
+cp $srcdir/final.mdl $dir
+cp $srcdir/tree $dir
+cp $srcdir/cmvn_opts $dir
+cp $srcdir/{splice_opts,delta_opts,final.mat,final.alimdl} $dir 2>/dev/null || true
+cp $srcdir/frame_subsampling_factor $dir 2>/dev/null || true
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt
+cp $lang/phones.txt $dir
+
+if [ $stage -le 1 ]; then
+  echo "$0: Building biased-language-model decoding graphs..."
+
+
+  steps/cleanup/make_biased_lm_graphs.sh $graph_opts \
+    --nj $nj --cmd "$cmd" \
+     $data $lang $dir $dir/graphs
+fi
+
+online_ivector_dir=
+if [ ! -z "$extractor" ]; then
+  online_ivector_dir=$dir/ivectors_$(basename $data_uniform_seg)
+
+  if [ $stage -le 2 ]; then
+    # Compute energy-based VAD
+    if $use_vad; then
+      steps/compute_vad_decision.sh $data_uniform_seg \
+        $data_uniform_seg/log $data_uniform_seg/data
+    fi
+
+    steps/online/nnet2/extract_ivectors_online.sh \
+      --nj $nj --cmd "$cmd --mem 4G" --use-vad $use_vad \
+      $data_uniform_seg $extractor $online_ivector_dir
+  fi
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Decoding with biased language models..."
+
+  steps/cleanup/decode_segmentation_nnet3.sh \
+    --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \
+    --skip-scoring true --allow-partial false \
+    --extra-left-context $extra_left_context \
+    --extra-right-context $extra_right_context \
+    --extra-left-context-initial $extra_left_context_initial \
+    --extra-right-context-final $extra_right_context_final \
+    --frames-per-chunk $frames_per_chunk \
+    ${online_ivector_dir:+--online-ivector-dir $online_ivector_dir} \
+    $dir/graphs $data $dir/lats
+  
+  # the following is for diagnostics, e.g. it will give us the lattice depth.
+  steps/diagnostic/analyze_lats.sh --cmd "$cmd" $lang $dir/lats
+fi
+
+frame_shift_opt=
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  frame_shift_opt="--frame-shift=0.0$(cat $srcdir/frame_subsampling_factor)"
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Doing oracle alignment of lattices..."
+  steps/cleanup/lattice_oracle_align.sh --cmd "$cmd --mem 4G" $frame_shift_opt \
+    $data $lang $dir/lats $dir/lattice_oracle
+fi
+
+
+if [ $stage -le 4 ]; then
+  echo "$0: using default values of non-scored words..."
+
+  # At the level of this script we just hard-code it that non-scored words are
+  # those that map to silence phones (which is what get_non_scored_words.py
+  # gives us), although this could easily be made user-configurable.  This list
+  # of non-scored words affects the behavior of several of the data-cleanup
+  # scripts; essentially, we view the non-scored words as negotiable when it
+  # comes to the reference transcript, so we'll consider changing the reference
+  # to match the hyp when it comes to these words.
+  steps/cleanup/internal/get_non_scored_words.py $lang > $dir/non_scored_words.txt
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: modifying ctm-edits file to allow repetitions [for dysfluencies] and "
+  echo "   ... to fix reference mismatches involving non-scored words. "
+
+  $cmd $dir/log/modify_ctm_edits.log \
+    steps/cleanup/internal/modify_ctm_edits.py --verbose=3 $dir/non_scored_words.txt \
+    $dir/lattice_oracle/ctm_edits $dir/ctm_edits.modified
+
+  echo "   ... See $dir/log/modify_ctm_edits.log for details and stats, including"
+  echo " a list of commonly-repeated words."
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: applying 'taint' markers to ctm-edits file to mark silences and"
+  echo "  ... non-scored words that are next to errors."
+  $cmd $dir/log/taint_ctm_edits.log \
+       steps/cleanup/internal/taint_ctm_edits.py $dir/ctm_edits.modified $dir/ctm_edits.tainted
+  echo "... Stats, including global cor/ins/del/sub stats, are in $dir/log/taint_ctm_edits.log."
+fi
+
+
+if [ $stage -le 7 ]; then
+  echo "$0: creating segmentation from ctm-edits file."
+
+  $cmd $dir/log/segment_ctm_edits.log \
+    steps/cleanup/internal/segment_ctm_edits.py \
+      $segmentation_opts \
+      --oov-symbol-file=$lang/oov.txt \
+      --ctm-edits-out=$dir/ctm_edits.segmented \
+      --word-stats-out=$dir/word_stats.txt \
+      $dir/non_scored_words.txt \
+      $dir/ctm_edits.tainted $dir/text $dir/segments
+
+  echo "$0: contents of $dir/log/segment_ctm_edits.log are:"
+  cat $dir/log/segment_ctm_edits.log
+  echo "For word-level statistics on p(not-being-in-a-segment), with 'worst' words at the top,"
+  echo "see $dir/word_stats.txt"
+  echo "For detailed utterance-level debugging information, see $dir/ctm_edits.segmented"
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: working out required segment padding to account for feature-generation edge effects."
+  # make sure $data/utt2dur exists.
+  utils/data/get_utt2dur.sh $data
+  # utt2dur.from_ctm contains lines of the form 'utt dur',  e.g.
+  # AMI_EN2001a_H00_MEE068_0000557_0000594 0.35
+  # where the times are ultimately derived from the num-frames in the features.
+  cat $dir/lattice_oracle/ctm_edits | \
+     awk '{utt=$1; t=$3+$4; if (t > dur[$1]) dur[$1] = t; } END{for (k in dur) print k, dur[k];}' | \
+     sort > $dir/utt2dur.from_ctm
+  # the apply_map command below gives us lines of the form 'utt dur-from-$data/utt2dur dur-from-utt2dur.from_ctm',
+  # e.g. AMI_EN2001a_H00_MEE068_0000557_0000594 0.37 0.35
+  utils/apply_map.pl -f 1 <(awk '{print $1,$1,$2}' <$data/utt2dur) <$dir/utt2dur.from_ctm  | \
+    awk '{printf("%.3f\n", $2 - $3); }' | sort | uniq -c > $dir/padding_frequencies
+  # there are values other than the most-frequent one (0.02) in there because
+  # of wav files that were shorter than the segment info.
+  padding=$(head -n 1 $dir/padding_frequencies | awk '{print $2}')
+  echo "$0: we'll pad segments with $padding seconds at segment ends to correct for feature-generation end effects"
+  echo $padding >$dir/segment_end_padding
+fi
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: based on the segments and text file in $dir/segments and $dir/text, creating new data-dir in $data_out"
+  padding=$(cat $dir/segment_end_padding)  # e.g. 0.02
+  utils/data/subsegment_data_dir.sh --segment-end-padding $padding ${data} $dir/segments $dir/text $data_out
+  # utils/data/subsegment_data_dir.sh can output directories that have e.g. to many entries left in wav.scp
+  # Clean this up with the fix_dat_dir.sh script
+  utils/fix_data_dir.sh $data_out
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: recomputing CMVN stats for the new data"
+  # Caution: this script puts the CMVN stats in $data_out/data,
+  # e.g. data/train_cleaned/data.  This is not the general pattern we use.
+  steps/compute_cmvn_stats.sh $data_out $data_out/log $data_out/data
+fi
+
+if $cleanup; then
+  echo "$0: cleaning up intermediate files"
+  rm -r $dir/fsts $dir/HCLG.fsts.scp || true
+  rm -r $dir/lats/lat.*.gz $dir/lats/split_fsts || true
+  rm $dir/lattice_oracle/lat.*.gz || true
+fi
+
+echo "$0: done."
diff --git a/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh b/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh
new file mode 100755
index 00000000000..02a9d87d26b
--- /dev/null
+++ b/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+# Copyright 2014  Guoguo Chen, 2015 GoVivace Inc. (Nagendra Goel)
+#           2017  Vimal Manohar
+# Apache 2.0
+
+# This script is similar to steps/cleanup/decode_segmentation.sh, but 
+# does decoding using nnet3 model.
+
+set -e
+set -o pipefail
+
+# Begin configuration section.
+stage=-1
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+post_decode_acwt=1.0  # can be used in 'chain' systems to scale acoustics by 10 so the
+                      # regular scoring script works.
+cmd=run.pl
+beam=15.0
+frames_per_chunk=50
+max_active=7000
+min_active=200
+ivector_scale=1.0
+lattice_beam=8.0  # Beam we use in lattice generation. We can reduce this if 
+                  # we only need the best path
+iter=final
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+scoring_opts=
+skip_scoring=false
+allow_partial=true
+extra_left_context=0
+extra_right_context=0
+extra_left_context_initial=-1
+extra_right_context_final=-1
+online_ivector_dir=
+minimize=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. utils/parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+   echo "$0: This is a special decoding script for segmentation where we"
+   echo "use one decoding graph per segment. We assume a file HCLG.fsts.scp exists"
+   echo "which is the scp file of the graphs for each segment."
+   echo "This will normally be obtained by steps/cleanup/make_biased_lm_graphs.sh."
+   echo ""
+   echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: $0 --online-ivector-dir exp/nnet3/ivectors_train_si284_split "
+   echo "             exp/nnet3/tdnn/graph_train_si284_split \\"
+   echo "             data/train_si284_split exp/nnet3/tdnn/decode_train_si284_split"
+   echo ""
+   echo "where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo "where the model is."
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --scoring-opts <string>                          # options to local/score.sh"
+   echo "  --num-threads <n>                                # number of threads to use, default 1."
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+
+mkdir -p $dir/log
+
+if [ -e $dir/$iter.mdl ]; then
+  srcdir=$dir
+elif [ -e $dir/../$iter.mdl ]; then
+  srcdir=$(dirname $dir)
+else
+  echo "$0: expected either $dir/$iter.mdl or $dir/../$iter.mdl to exist"
+  exit 1
+fi
+model=$srcdir/$iter.mdl
+
+
+extra_files=
+if [ ! -z "$online_ivector_dir" ]; then
+  steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+fi
+
+utils/lang/check_phones_compatible.sh $graph_dir/phones.txt $srcdir/phones.txt || exit 1
+
+for f in $graphdir/HCLG.fsts.scp $data/feats.scp $model $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1;
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+# Split HCLG.fsts.scp by input utterance
+n1=$(cat $graphdir/HCLG.fsts.scp | wc -l)
+n2=$(cat $data/feats.scp | wc -l)
+if [ $n1 != $n2 ]; then
+  echo "$0: expected $n2 graphs in $graphdir/HCLG.fsts.scp, got $n1"
+fi
+
+mkdir -p $dir/split_fsts
+sort -k1,1 $graphdir/HCLG.fsts.scp > $dir/HCLG.fsts.sorted.scp
+utils/filter_scps.pl --no-warn -f 1 JOB=1:$nj \
+  $sdata/JOB/feats.scp $dir/HCLG.fsts.sorted.scp $dir/split_fsts/HCLG.fsts.JOB.scp
+HCLG=scp:$dir/split_fsts/HCLG.fsts.JOB.scp
+
+## Set up features.
+echo "$0: feature type is raw"
+
+feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+
+if [ ! -z "$online_ivector_dir" ]; then
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
+fi
+
+if [ "$post_decode_acwt" == 1.0 ]; then
+  lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz"
+else
+  lat_wspecifier="ark:|lattice-scale --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz"
+fi
+
+frame_subsampling_opt=
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  # e.g. for 'chain' systems
+  frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)"
+fi
+
+if [ $stage -le 1 ]; then
+  if [ -f "$graphdir/num_pdfs" ]; then
+    [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \
+      { echo "Mismatch in number of pdfs with $model"; exit 1; }
+  fi
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
+    nnet3-latgen-faster$thread_string $ivector_opts $frame_subsampling_opt \
+     --frames-per-chunk=$frames_per_chunk \
+     --extra-left-context=$extra_left_context \
+     --extra-right-context=$extra_right_context \
+     --extra-left-context-initial=$extra_left_context_initial \
+     --extra-right-context-final=$extra_right_context_final \
+     --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=$allow_partial \
+     --word-symbol-table=$graphdir/words.txt "$model" \
+     "$HCLG" "$feats" "$lat_wspecifier" || exit 1;
+fi
+
+
+if [ $stage -le 2 ]; then
+  if ! $skip_scoring ; then
+    [ ! -x local/score.sh ] && \
+      echo "$0: Not scoring because local/score.sh does not exist or not executable." && exit 1;
+    iter_opt=
+    [ "$iter" != "final" ] && iter_opt="--iter $iter"
+    local/score.sh $iter_opt $scoring_opts --cmd "$cmd" $data $graphdir $dir ||
+      { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; }
+  fi
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh
index 16350fdb032..c7e50ea165e 100755
--- a/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh
+++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh
@@ -4,6 +4,23 @@
 #           2016  Vimal Manohar
 # Apache 2.0
 
+# This script performs segmentation of the input data based on the transcription
+# and outputs segmented data along with the corresponding aligned transcription.
+# The purpose of this script is to divide up the input data (which may consist
+# of long recordings such as television shows or audiobooks) into segments which
+# are of manageable length for further processing, along with the portion of the
+# transcript that seems to match (aligns with) each segment.
+# This the light-supervised training scenario where the input transcription is
+# not expected to be completely clean and may have significant errors. 
+# See "JHU Kaldi System for Arabic MGB-3 ASR Challenge using Diarization,
+# Audio-transcript Alignment and Transfer Learning": Vimal Manohar, Daniel
+# Povey, Sanjeev Khudanpur, ASRU 2017
+# (http://www.danielpovey.com/files/2017_asru_mgb3.pdf) for details.
+# The output data is not necessarily particularly clean; you can run
+# steps/cleanup/clean_and_segment_data.sh on the output in order to
+# further clean it and eliminate data where the transcript doesn't seem to
+# match.
+
 . ./path.sh
 
 set -e
@@ -380,7 +397,8 @@ if [ $stage -le 9 ]; then
 fi
 
 if [ $stage -le 10 ]; then
-  steps/cleanup/internal/resolve_ctm_edits_overlaps.py \
+  $cmd $dir/log/resolve_ctm_edits.log \
+    steps/cleanup/internal/resolve_ctm_edits_overlaps.py \
     ${data_uniform_seg}/segments $decode_dir/ctm_$lmwt/ctm_edits $dir/ctm_edits
 fi
 
diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
new file mode 100755
index 00000000000..d21b94fc5fb
--- /dev/null
+++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
@@ -0,0 +1,523 @@
+#!/bin/bash
+
+# Copyright 2014  Guoguo Chen
+#           2016  Vimal Manohar
+# Apache 2.0
+
+# This script is similar to steps/cleanup/segment_long_utterances.sh, but 
+# uses nnet3 acoustic model instead of GMM acoustic model for decoding.
+# This script performs segmentation of the input data based on the transcription
+# and outputs segmented data along with the corresponding aligned transcription.
+# The purpose of this script is to divide up the input data (which may consist
+# of long recordings such as television shows or audiobooks) into segments which
+# are of manageable length for further processing, along with the portion of the
+# transcript that seems to match (aligns with) each segment.
+# This the light-supervised training scenario where the input transcription is
+# not expected to be completely clean and may have significant errors. 
+# See "JHU Kaldi System for Arabic MGB-3 ASR Challenge using Diarization,
+# Audio-transcript Alignment and Transfer Learning": Vimal Manohar, Daniel
+# Povey, Sanjeev Khudanpur, ASRU 2017
+# (http://www.danielpovey.com/files/2017_asru_mgb3.pdf) for details.
+# The output data is not necessarily particularly clean; you can run
+# steps/cleanup/clean_and_segment_data_nnet3.sh on the output in order to
+# further clean it and eliminate data where the transcript doesn't seem to
+# match.
+
+
+set -e
+set -o pipefail
+set -u
+
+stage=-1
+cmd=run.pl
+nj=4
+
+# Uniform segmentation options
+max_segment_duration=30
+overlap_duration=5
+seconds_per_spk_max=30
+
+# Decode options
+graph_opts=
+beam=15.0
+lattice_beam=1.0
+lmwt=10
+
+# Contexts must ideally match training
+extra_left_context=0  # Set to some large value, typically 40 for LSTM (must match training)
+extra_right_context=0  
+extra_left_context_initial=-1
+extra_right_context_final=-1
+frames_per_chunk=150
+
+# i-vector options
+extractor=    # i-Vector extractor. If provided, will extract i-vectors. 
+              # Required if the network was trained with i-vector extractor. 
+use_vad=   # Use energy-based VAD for i-vector extraction
+
+# TF-IDF similarity search options
+max_words=1000
+num_neighbors_to_search=1   # Number of neighboring documents to search around the one retrieved based on maximum tf-idf similarity.
+neighbor_tfidf_threshold=0.5
+
+align_full_hyp=false  # Align full hypothesis i.e. trackback from the end to get the alignment.
+
+# First-pass segmentation opts
+# These options are passed to the script
+# steps/cleanup/internal/segment_ctm_edits_mild.py
+segmentation_extra_opts=
+min_split_point_duration=0.1
+max_deleted_words_kept_when_merging=1
+max_wer=50
+max_segment_length_for_merging=60
+max_bad_proportion=0.75
+max_intersegment_incorrect_words_length=1
+max_segment_length_for_splitting=10
+hard_max_segment_length=15
+min_silence_length_to_split_at=0.3
+min_non_scored_length_to_split_at=0.3
+
+
+. ./path.sh
+. utils/parse_options.sh
+
+if [ $# -ne 5 ] && [ $# -ne 7 ]; then
+  cat <<EOF
+Usage: $0 [--extractor <ivector-extractor>] [options] <model-dir> <lang> <data-in> [<text-in> <utt2text>] <segmented-data-out> <work-dir>
+ e.g.: $0 exp/wsj_tri2b data/lang_nosp data/train_long data/train_long/text data/train_reseg exp/segment_wsj_long_utts_train
+This script performs segmentation of the data in <data-in> and writes out the
+segmented data (with a segments file) to
+<segmented-data-out> along with the corresponding aligned transcription.
+Note: If <utt2text> is not provided, the "text" file in <data-in> is used as the
+raw transcripts to train biased LM for the utterances.
+If <utt2text> is provided, then it should be a mapping from the utterance-ids in
+<data-in> to the transcript-keys in the file <text-in>, which will be
+used to train biased LMs for the utterances.
+The purpose of this script is to divide up the input data (which may consist of
+long recordings such as television shows or audiobooks) into segments which are
+of manageable length for further processing, along with the portion of the
+transcript that seems to match each segment.
+The output data is not necessarily particularly clean; you are advised to run
+steps/cleanup/clean_and_segment_data.sh on the output in order to further clean
+it and eliminate data where the transcript doesn't seem to match.
+  main options (for others, see top of script file):
+    --stage <n>             # stage to run from, to enable resuming from partially
+                            # completed run (default: 0)
+    --cmd '$cmd'            # command to submit jobs with (e.g. run.pl, queue.pl)
+    --nj <n>                # number of parallel jobs to use in graph creation and
+                            # decoding
+    --graph-opts 'opts'         # Additional options to make_biased_lm_graphs.sh.
+                                # Please run steps/cleanup/make_biased_lm_graphs.sh
+                                # without arguments to see allowed options.
+    --segmentation-extra-opts 'opts'  # Additional options to segment_ctm_edits_mild.py.
+                                # Please run steps/cleanup/internal/segment_ctm_edits_mild.py
+                                # without arguments to see allowed options.
+    --align-full-hyp <true|false>  # If true, align full hypothesis 
+                                   i.e. trackback from the end to get the alignment. 
+                                   This is different from the normal 
+                                   Smith-Waterman alignment, where the
+                                   traceback will be from the maximum score.
+    --extractor <extractor>     # i-vector extractor directory if i-vector is 
+                                # to be used during decoding. Must match
+                                # the extractor used for training neural-network.
+    --use-vad <true|false>      # If true, uses energy-based VAD to apply frame weights
+                                # for i-vector stats extraction
+EOF
+  exit 1
+fi
+
+srcdir=$1
+lang=$2
+data=$3
+
+extra_files=
+utt2text=
+text=$data/text
+if [ $# -eq 7 ]; then
+  text=$4
+  utt2text=$5
+  out_data=$6
+  dir=$7
+  extra_files="$utt2text"
+else
+  out_data=$4
+  dir=$5
+fi
+
+if [ ! -z "$extractor" ]; then
+  extra_files="$extra_files $extractor/final.ie"
+fi
+
+for f in $data/feats.scp $text $extra_files $srcdir/tree \
+  $srcdir/final.mdl $srcdir/cmvn_opts; do
+  if [ ! -f $f ]; then
+    echo "$0: Could not find file $f"
+    exit 1
+  fi
+done
+
+data_id=`basename $data`
+mkdir -p $dir
+cp $srcdir/final.mdl $dir
+cp $srcdir/tree $dir
+cp $srcdir/cmvn_opts $dir
+cp $srcdir/{splice_opts,delta_opts,final.mat,final.alimdl} $dir 2>/dev/null || true
+cp $srcdir/frame_subsampling_factor $dir 2>/dev/null || true
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt
+cp $lang/phones.txt $dir
+
+data_uniform_seg=$dir/${data_id}_uniform_seg
+
+# First we split the data into segments of around 30s long, on which
+# it would be possible to do a decoding.
+# A diarization step will be added in the future.
+if [ $stage -le 1 ]; then
+  echo "$0: Stage 1 (Splitting data directory $data into uniform segments)"
+
+  utils/data/get_utt2dur.sh $data
+  if [ ! -f $data/segments ]; then
+    utils/data/get_segments_for_data.sh $data > $data/segments
+  fi
+
+  utils/data/get_uniform_subsegments.py \
+    --max-segment-duration=$max_segment_duration \
+    --overlap-duration=$overlap_duration \
+    --max-remaining-duration=$(perl -e "print $max_segment_duration / 2.0") \
+    $data/segments > $dir/uniform_sub_segments
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Stage 2 (Prepare uniform sub-segmented data directory)"
+  rm -r $data_uniform_seg || true
+
+  if [ ! -z "$seconds_per_spk_max" ]; then
+    utils/data/subsegment_data_dir.sh \
+      $data $dir/uniform_sub_segments $dir/${data_id}_uniform_seg.temp
+
+    utils/data/modify_speaker_info.sh --seconds-per-spk-max $seconds_per_spk_max \
+      $dir/${data_id}_uniform_seg.temp $data_uniform_seg
+  else
+    utils/data/subsegment_data_dir.sh \
+      $data $dir/uniform_sub_segments $data_uniform_seg
+  fi
+
+  utils/fix_data_dir.sh $data_uniform_seg
+
+  # Compute new cmvn stats for the segmented data directory
+  steps/compute_cmvn_stats.sh $data_uniform_seg/
+fi
+
+graph_dir=$dir/graphs_uniform_seg
+
+if [ $stage -le 3 ]; then
+  echo "$0: Stage 3 (Building biased-language-model decoding graphs)"
+
+  mkdir -p $graph_dir
+
+  # Make graphs w.r.t. to the original text (usually recording-level)
+  steps/cleanup/make_biased_lm_graphs.sh $graph_opts \
+    --nj $nj --cmd "$cmd" $text \
+    $lang $dir $dir/graphs
+  if [ -z "$utt2text" ]; then
+    # and then copy it to the sub-segments.
+    cat $dir/uniform_sub_segments | awk '{print $1" "$2}' | \
+      utils/apply_map.pl -f 2 $dir/graphs/HCLG.fsts.scp | \
+      sort -k1,1 > \
+      $graph_dir/HCLG.fsts.scp
+  else
+    # and then copy it to the sub-segments.
+    cat $dir/uniform_sub_segments | awk '{print $1" "$2}' | \
+      utils/apply_map.pl -f 2 $utt2text | \
+      utils/apply_map.pl -f 2 $dir/graphs/HCLG.fsts.scp | \
+      sort -k1,1 > \
+      $graph_dir/HCLG.fsts.scp
+  fi
+
+  cp $lang/words.txt $graph_dir
+  cp -r $lang/phones $graph_dir
+  [ -f $dir/graphs/num_pdfs ] && cp $dir/graphs/num_pdfs $graph_dir/
+fi
+
+decode_dir=$dir/lats
+mkdir -p $decode_dir
+
+online_ivector_dir=
+if [ ! -z "$extractor" ]; then
+  online_ivector_dir=$dir/ivectors_$(basename $data_uniform_seg)
+
+  if [ $stage -le 4 ]; then
+    # Compute energy-based VAD
+    if $use_vad; then
+      steps/compute_vad_decision.sh $data_uniform_seg \
+        $data_uniform_seg/log $data_uniform_seg/data
+    fi
+
+    steps/online/nnet2/extract_ivectors_online.sh \
+      --nj $nj --cmd "$cmd --mem 4G" --use-vad $use_vad \
+      $data_uniform_seg $extractor $online_ivector_dir
+  fi
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Decoding with biased language models..."
+
+  steps/cleanup/decode_segmentation_nnet3.sh \
+    --beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \
+    --skip-scoring true --allow-partial false \
+    --extra-left-context $extra_left_context \
+    --extra-right-context $extra_right_context \
+    --extra-left-context-initial $extra_left_context_initial \
+    --extra-right-context-final $extra_right_context_final \
+    --frames-per-chunk $frames_per_chunk \
+    ${online_ivector_dir:+--online-ivector-dir $online_ivector_dir} \
+    $graph_dir $data_uniform_seg $decode_dir
+fi
+
+frame_shift_opt=
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  frame_shift_opt="--frame-shift=0.0$(cat $srcdir/frame_subsampling_factor)"
+fi
+
+if [ $stage -le 6 ]; then
+  steps/get_ctm_fast.sh --lmwt $lmwt --cmd "$cmd --mem 4G" \
+    --print-silence true $frame_shift_opt \
+    $data_uniform_seg $lang $decode_dir $decode_dir/ctm_$lmwt
+fi
+
+# Split the original text into documents, over which we can do
+# searching reasonably efficiently. Also get a mapping from the original
+# text to the created documents (i.e. text2doc)
+# Since the Smith-Waterman alignment is linear in the length of the
+# text, we want to keep it reasonably small (a few thousand words).
+
+if [ $stage -le 7 ]; then
+  # Split the reference text into documents.
+  mkdir -p $dir/docs
+
+  # text2doc is a mapping from the original transcript to the documents
+  # it is split into.
+  # The format is
+  # <original-transcript> <doc1> <doc2> ...
+  steps/cleanup/internal/split_text_into_docs.pl --max-words $max_words \
+    $text $dir/docs/doc2text $dir/docs/docs.txt
+  utils/utt2spk_to_spk2utt.pl $dir/docs/doc2text > $dir/docs/text2doc
+fi
+
+if [ $stage -le 8 ]; then
+  # Get TF-IDF for the reference documents.
+  echo $nj > $dir/docs/num_jobs
+
+  utils/split_data.sh $data_uniform_seg $nj
+
+  mkdir -p $dir/docs/split$nj/
+
+  # First compute IDF stats
+  $cmd $dir/log/compute_source_idf_stats.log \
+    steps/cleanup/internal/compute_tf_idf.py \
+    --tf-weighting-scheme="raw" \
+    --idf-weighting-scheme="log" \
+    --output-idf-stats=$dir/docs/idf_stats.txt \
+    $dir/docs/docs.txt $dir/docs/src_tf_idf.txt
+
+  # Split documents so that they can be accessed easily by parallel jobs.
+  mkdir -p $dir/docs/split$nj/
+  sdir=$dir/docs/split$nj
+  for n in `seq $nj`; do
+
+    # old2new_utts is a mapping from the original segments to the
+    # new segments created by uniformly segmenting.
+    # The format is <old-utterance> <new-utt1> <new-utt2> ...
+    utils/filter_scp.pl $data_uniform_seg/split$nj/$n/utt2spk $dir/uniform_sub_segments | \
+      cut -d ' ' -f 1,2 | utils/utt2spk_to_spk2utt.pl > $sdir/old2new_utts.$n.txt
+
+    if [ ! -z "$utt2text" ]; then
+      # utt2text, if provided, is a mapping from the <old-utterance> to
+      # <original-transript>.
+      # Since text2doc is mapping from <original-transcript> to documents, we
+      # first have to find the original-transcripts that are in the current
+      # split.
+      utils/filter_scp.pl $sdir/old2new_utts.$n.txt $utt2text | \
+        cut -d ' ' -f 2 | sort -u | \
+        utils/filter_scp.pl /dev/stdin $dir/docs/text2doc > $sdir/text2doc.$n
+    else
+      utils/filter_scp.pl $sdir/old2new_utts.$n.txt \
+        $dir/docs/text2doc > $sdir/text2doc.$n
+    fi
+
+    utils/spk2utt_to_utt2spk.pl $sdir/text2doc.$n | \
+      utils/filter_scp.pl /dev/stdin $dir/docs/docs.txt > \
+      $sdir/docs.$n.txt
+  done
+
+  # Compute TF-IDF for the source documents.
+  $cmd JOB=1:$nj $dir/docs/log/get_tfidf_for_source_texts.JOB.log \
+    steps/cleanup/internal/compute_tf_idf.py \
+      --tf-weighting-scheme="raw" \
+      --idf-weighting-scheme="log" \
+      --input-idf-stats=$dir/docs/idf_stats.txt \
+      $sdir/docs.JOB.txt $sdir/src_tf_idf.JOB.txt
+
+  sdir=$dir/docs/split$nj
+  # Make $sdir an absolute pathname.
+  sdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $sdir ${PWD}`
+
+  for n in `seq $nj`; do
+    awk -v f="$sdir/src_tf_idf.$n.txt" '{print $1" "f}' \
+      $sdir/text2doc.$n
+  done | perl -ane 'BEGIN { %tfidfs = (); }
+  {
+    if (!defined $tfidfs{$F[0]}) {
+      $tfidfs{$F[0]} = $F[1];
+    }
+  }
+  END {
+  while(my ($k, $v) = each %tfidfs) {
+    print "$k $v\n";
+  } }' > $dir/docs/source2tf_idf.scp
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: using default values of non-scored words..."
+
+  # At the level of this script we just hard-code it that non-scored words are
+  # those that map to silence phones (which is what get_non_scored_words.py
+  # gives us), although this could easily be made user-configurable.  This list
+  # of non-scored words affects the behavior of several of the data-cleanup
+  # scripts; essentially, we view the non-scored words as negotiable when it
+  # comes to the reference transcript, so we'll consider changing the reference
+  # to match the hyp when it comes to these words.
+  steps/cleanup/internal/get_non_scored_words.py $lang > $dir/non_scored_words.txt
+fi
+
+if [ $stage -le 10 ]; then
+  sdir=$dir/query_docs/split$nj
+  mkdir -p $sdir
+
+  # Compute TF-IDF for the query documents (decode hypotheses).
+  # The output is an archive of TF-IDF indexed by the query.
+  $cmd JOB=1:$nj $decode_dir/ctm_$lmwt/log/compute_query_tf_idf.JOB.log \
+    steps/cleanup/internal/ctm_to_text.pl --non-scored-words $dir/non_scored_words.txt \
+      $decode_dir/ctm_$lmwt/ctm.JOB \| \
+    steps/cleanup/internal/compute_tf_idf.py \
+      --tf-weighting-scheme="normalized" \
+      --idf-weighting-scheme="log" \
+      --input-idf-stats=$dir/docs/idf_stats.txt \
+      --accumulate-over-docs=false \
+      - $sdir/query_tf_idf.JOB.ark.txt
+
+  # The relevant documents can be found using TF-IDF similarity and nearby
+  # documents can also be picked for the Smith-Waterman alignment stage.
+
+  # Get a mapping from the new utterance-ids to original transcripts
+  if [ -z "$utt2text" ]; then
+    awk '{print $1" "$2}' $dir/uniform_sub_segments > \
+      $dir/new2orig_utt
+  else
+    awk '{print $1" "$2}' $dir/uniform_sub_segments | \
+      utils/apply_map.pl -f 2 $utt2text > \
+      $dir/new2orig_utt
+  fi
+
+  # The query TF-IDFs are all indexed by the utterance-id of the sub-segments.
+  # The source TF-IDFs use the document-ids created by splitting the reference
+  # text into documents.
+  # For each query, we need to retrieve the documents that were created from
+  # the same original utterance that the sub-segment was from. For this,
+  # we have to load the source TF-IDF that has those documents. This
+  # information is provided using the option --source-text-id2tf-idf-file.
+  # The output of this script is a file where the first column is the
+  # query-id (i.e. sub-segment-id) and the remaining columns, which is at least
+  # one in number and a maxmium of (1 + 2 * num-neighbors-to-search) columns
+  # is the document-ids for the retrieved documents.
+  $cmd JOB=1:$nj $dir/log/retrieve_similar_docs.JOB.log \
+    steps/cleanup/internal/retrieve_similar_docs.py \
+      --query-tfidf=$dir/query_docs/split$nj/query_tf_idf.JOB.ark.txt \
+      --source-text-id2tfidf=$dir/docs/source2tf_idf.scp \
+      --source-text-id2doc-ids=$dir/docs/text2doc \
+      --query-id2source-text-id=$dir/new2orig_utt \
+      --num-neighbors-to-search=$num_neighbors_to_search \
+      --neighbor-tfidf-threshold=$neighbor_tfidf_threshold \
+      --relevant-docs=$dir/query_docs/split$nj/relevant_docs.JOB.txt
+
+  $cmd JOB=1:$nj $decode_dir/ctm_$lmwt/log/get_ctm_edits.JOB.log \
+    steps/cleanup/internal/stitch_documents.py \
+      --query2docs=$dir/query_docs/split$nj/relevant_docs.JOB.txt \
+      --input-documents=$dir/docs/split$nj/docs.JOB.txt \
+      --output-documents=- \| \
+    steps/cleanup/internal/align_ctm_ref.py --eps-symbol='"<eps>"' \
+      --oov-word="'`cat $lang/oov.txt`'" --symbol-table=$lang/words.txt \
+      --hyp-format=CTM --align-full-hyp=$align_full_hyp \
+      --hyp=$decode_dir/ctm_$lmwt/ctm.JOB --ref=- \
+      --output=$decode_dir/ctm_$lmwt/ctm_edits.JOB
+
+  for n in `seq $nj`; do
+    cat $decode_dir/ctm_$lmwt/ctm_edits.$n
+  done > $decode_dir/ctm_$lmwt/ctm_edits
+
+fi
+
+if [ $stage -le 11 ]; then
+  $cmd $dir/log/resolve_ctm_edits.log \
+    steps/cleanup/internal/resolve_ctm_edits_overlaps.py \
+    ${data_uniform_seg}/segments $decode_dir/ctm_$lmwt/ctm_edits $dir/ctm_edits
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: modifying ctm-edits file to allow repetitions [for dysfluencies] and "
+  echo "   ... to fix reference mismatches involving non-scored words. "
+
+  $cmd $dir/log/modify_ctm_edits.log \
+    steps/cleanup/internal/modify_ctm_edits.py --verbose=3 $dir/non_scored_words.txt \
+    $dir/ctm_edits $dir/ctm_edits.modified
+
+  echo "   ... See $dir/log/modify_ctm_edits.log for details and stats, including"
+  echo " a list of commonly-repeated words."
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: applying 'taint' markers to ctm-edits file to mark silences and"
+  echo "  ... non-scored words that are next to errors."
+  $cmd $dir/log/taint_ctm_edits.log \
+       steps/cleanup/internal/taint_ctm_edits.py --remove-deletions=false \
+       $dir/ctm_edits.modified $dir/ctm_edits.tainted
+  echo "... Stats, including global cor/ins/del/sub stats, are in $dir/log/taint_ctm_edits.log."
+fi
+
+if [ $stage -le 14 ]; then
+  echo "$0: creating segmentation from ctm-edits file."
+
+  segmentation_opts=(
+  --min-split-point-duration=$min_split_point_duration
+  --max-deleted-words-kept-when-merging=$max_deleted_words_kept_when_merging
+  --merging.max-wer=$max_wer
+  --merging.max-segment-length=$max_segment_length_for_merging
+  --merging.max-bad-proportion=$max_bad_proportion
+  --merging.max-intersegment-incorrect-words-length=$max_intersegment_incorrect_words_length
+  --splitting.max-segment-length=$max_segment_length_for_splitting
+  --splitting.hard-max-segment-length=$hard_max_segment_length
+  --splitting.min-silence-length=$min_silence_length_to_split_at
+  --splitting.min-non-scored-length=$min_non_scored_length_to_split_at
+  )
+
+  $cmd $dir/log/segment_ctm_edits.log \
+    steps/cleanup/internal/segment_ctm_edits_mild.py \
+      ${segmentation_opts[@]} $segmentation_extra_opts \
+      --oov-symbol-file=$lang/oov.txt \
+      --ctm-edits-out=$dir/ctm_edits.segmented \
+      --word-stats-out=$dir/word_stats.txt \
+      $dir/non_scored_words.txt \
+      $dir/ctm_edits.tainted $dir/text $dir/segments
+
+  echo "$0: contents of $dir/log/segment_ctm_edits.log are:"
+  cat $dir/log/segment_ctm_edits.log
+  echo "For word-level statistics on p(not-being-in-a-segment), with 'worst' words at the top,"
+  echo "see $dir/word_stats.txt"
+  echo "For detailed utterance-level debugging information, see $dir/ctm_edits.segmented"
+fi
+
+mkdir -p $out_data
+if [ $stage -le 15 ]; then
+  utils/data/subsegment_data_dir.sh $data_uniform_seg \
+    $dir/segments $dir/text $out_data
+fi
diff --git a/egs/wsj/s5/steps/compute_vad_decision.sh b/egs/wsj/s5/steps/compute_vad_decision.sh
new file mode 100755
index 00000000000..4cf3c5b2b79
--- /dev/null
+++ b/egs/wsj/s5/steps/compute_vad_decision.sh
@@ -0,0 +1,86 @@
+#!/bin/bash 
+
+# Copyright    2017  Vimal Manohar
+# Apache 2.0
+
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Compute energy based VAD output
+
+nj=4
+cmd=run.pl
+vad_config=conf/vad.conf
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+   echo "Usage: $0 [options] <data-dir> [<log-dir> [<vad-dir>]]";
+   echo "e.g.: $0 data/train exp/make_vad mfcc"
+   echo "Note: <log-dir> defaults to <data-dir>/log, and <vad-dir> defaults to <data-dir>/data"
+   echo " Options:"
+   echo "  --vad-config <config-file>                       # config passed to compute-vad-energy"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+if [ $# -ge 2 ]; then
+  logdir=$2
+else
+  logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+  vaddir=$3
+else
+  vaddir=$data/data
+fi
+
+
+# make $vaddir an absolute pathname.
+vaddir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vaddir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $vaddir || exit 1;
+mkdir -p $logdir || exit 1;
+
+if [ -f $data/vad.scp ]; then
+  mkdir -p $data/.backup
+  echo "$0: moving $data/vad.scp to $data/.backup"
+  mv $data/vad.scp $data/.backup
+fi
+
+for f in $data/feats.scp "$vad_config"; do
+  if [ ! -f $f ]; then
+    echo "compute_vad_decision.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+utils/split_data.sh $data $nj || exit 1;
+sdata=$data/split$nj;
+
+$cmd JOB=1:$nj $logdir/vad_${name}.JOB.log \
+  compute-vad --config=$vad_config scp:$sdata/JOB/feats.scp \
+  ark,scp:$vaddir/vad_${name}.JOB.ark,$vaddir/vad_${name}.JOB.scp || exit 1
+
+for ((n=1; n<=nj; n++)); do
+  cat $vaddir/vad_${name}.$n.scp || exit 1;
+done > $data/vad.scp
+
+nc=`cat $data/vad.scp | wc -l` 
+nu=`cat $data/feats.scp | wc -l` 
+if [ $nc -ne $nu ]; then
+  echo "**Warning it seems not all of the speakers got VAD output ($nc != $nu);"
+  echo "**validate_data_dir.sh will fail; you might want to use fix_data_dir.sh"
+  [ $nc -eq 0 ] && exit 1;
+fi
+
+
+echo "Created VAD output for $name"
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 71e64d9e680..f6be7a286ec 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -413,13 +413,7 @@ def CreateReverberatedCopy(input_dir,
     wav_scp = ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x))
     if not os.path.isfile(input_dir + "/reco2dur"):
         print("Getting the duration of the recordings...");
-        read_entire_file="false"
-        for value in wav_scp.values():
-            # we will add more checks for sox commands which modify the header as we come across these cases in our data
-            if "sox" in value and "speed" in value:
-                read_entire_file="true"
-                break
-        data_lib.RunKaldiCommand("wav-to-duration --read-entire-file={1} scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir, read_entire_file))
+        data_lib.RunKaldiCommand("utils/data/get_reco2dur.sh {}".format(input_dir))
     durations = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0]))
     foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh
index cf1cc9124d3..92780f76480 100755
--- a/egs/wsj/s5/steps/nnet3/align.sh
+++ b/egs/wsj/s5/steps/nnet3/align.sh
@@ -24,6 +24,7 @@ extra_right_context=0
 extra_left_context_initial=-1
 extra_right_context_final=-1
 online_ivector_dir=
+graphs_scp=
 # End configuration options.
 
 echo "$0 $@"  # Print the command line for logging
@@ -97,8 +98,6 @@ fi
 
 echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
 
-tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
-
 frame_subsampling_opt=
 if [ -f $srcdir/frame_subsampling_factor ]; then
   # e.g. for 'chain' systems
@@ -114,9 +113,20 @@ if [ -f $srcdir/frame_subsampling_factor ]; then
   fi
 fi
 
+if [ ! -z "$graphs_scp" ]; then
+  if [ ! -f $graphs_scp ]; then
+    echo "Could not find graphs $graphs_scp" && exit 1
+  fi
+  tra="scp:utils/filter_scp.pl $sdata/JOB/utt2spk $graphs_scp |"
+  prog=compile-train-graphs-fsts
+else
+  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
+  prog=compile-train-graphs
+fi
 
 $cmd $queue_opt JOB=1:$nj $dir/log/align.JOB.log \
-  compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $srcdir/${iter}.mdl  $lang/L.fst "$tra" ark:- \| \
+  $prog --read-disambig-syms=$lang/phones/disambig.int $dir/tree \
+  $srcdir/${iter}.mdl  $lang/L.fst "$tra" ark:- \| \
   nnet3-align-compiled $scale_opts $ivector_opts $frame_subsampling_opt \
   --frames-per-chunk=$frames_per_chunk \
   --extra-left-context=$extra_left_context \
diff --git a/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh b/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh
index 7dade75a0ed..3b6371168ce 100755
--- a/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh
@@ -86,37 +86,44 @@ else
   fi
 fi
 
-if [ $stage -le 1 ]; then
-  all_phones=""  # will contain the names of the .gz files containing phones,
-                 # with some members possibly repeated per the --num-repeats
-                 # option
-  for n in `seq 0 $[num_alignments-1]`; do
-    this_num_repeats=${num_repeats_array[$n]}
-    this_alignment_dir=${ali_dirs[$n]}
-    num_jobs=$(cat $this_alignment_dir/num_jobs)
-    if ! [ "$this_num_repeats" -gt 0 ]; then
-      echo "Expected comma-separated list of integers for --num-repeats option, got '$num_repeats'"
-      exit 1
-    fi
+all_phones=""  # will contain the names of the .gz files containing phones,
+               # with some members possibly repeated per the --num-repeats
+               # option
+for n in `seq 0 $[num_alignments-1]`; do
+  this_num_repeats=${num_repeats_array[$n]}
+  this_alignment_dir=${ali_dirs[$n]}
+  num_jobs=$(cat $this_alignment_dir/num_jobs)
+  if ! [ "$this_num_repeats" -ge 0 ]; then
+    echo "Expected comma-separated list of integers for --num-repeats option, got '$num_repeats'"
+    exit 1
+  fi
 
 
+  if [ $stage -le 1 ]; then
     for j in $(seq $num_jobs); do gunzip -c $this_alignment_dir/ali.$j.gz; done | \
       ali-to-phones $this_alignment_dir/final.mdl ark:- "ark:|gzip -c >$dir/phones.$n.gz" || exit 1;
+  fi
 
-    all_phones="$all_phones $(for r in $(seq $this_num_repeats); do echo $dir/phones.$n.gz; done)"
-  done
+  if [ ! -s $dir/phones.$n.gz ]; then
+    echo "$dir/phones.$n.gz is empty or does not exist"
+    exit 1
+  fi
 
+  all_phones="$all_phones $(for r in $(seq $this_num_repeats); do echo $dir/phones.$n.gz; done)"
+done
+
+if [ $stage -le 2 ]; then
   $cmd $dir/log/make_phone_lm_fst.log \
     gunzip -c $all_phones \| \
     chain-est-phone-lm $lm_opts ark:- $dir/phone_lm.fst || exit 1;
   rm $dir/phones.*.gz
 fi
 
-if [ $stage -le 2 ]; then
+if [ $stage -le 3 ]; then
   copy-transition-model ${ali_dirs[0]}/final.mdl $dir/0.trans_mdl || exit 1;
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 4 ]; then
   $cmd $dir/log/make_den_fst.log \
     chain-make-den-fst $dir/tree $dir/0.trans_mdl \
     $dir/phone_lm.fst \
diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
index 0a5eb340a34..ddbc1a74266 100755
--- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
+++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
@@ -42,6 +42,7 @@ max_count=0         # The use of this option (e.g. --max-count 100) can make
                     # posterior-scaling, so assuming the posterior-scale is 0.1,
                     # --max-count 100 starts having effect after 1000 frames, or
                     # 10 seconds of data.
+use_vad=false
 
 # End configuration section.
 
@@ -69,8 +70,13 @@ data=$1
 srcdir=$2
 dir=$3
 
+extra_files=
+if $use_vad; then
+  extra_files=$data/vad.scp
+fi
+
 for f in $data/feats.scp $srcdir/final.ie $srcdir/final.dubm $srcdir/global_cmvn.stats $srcdir/splice_opts \
-     $srcdir/online_cmvn.conf $srcdir/final.mat; do
+     $srcdir/online_cmvn.conf $srcdir/final.mat $extra_files; do
   [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
 done
 
@@ -117,9 +123,15 @@ done
 
 if [ $stage -le 0 ]; then
   echo "$0: extracting iVectors"
+  extra_opts=
+  if $use_vad; then
+    extra_opts="--frame-weights-rspecifier=scp:$data/vad.scp"
+  fi
+
   $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
-     ivector-extract-online2 --config=$ieconf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/feats.scp ark:- \| \
-     copy-feats --compress=$compress ark:- \
+    ivector-extract-online2 --config=$ieconf $extra_opts \
+      ark:$sdata/JOB/spk2utt scp:$sdata/JOB/feats.scp ark:- \| \
+    copy-feats --compress=$compress ark:- \
       ark,scp:$absdir/ivector_online.JOB.ark,$absdir/ivector_online.JOB.scp || exit 1;
 fi
 
diff --git a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
index 60e3df20df2..f71a14aebf1 100755
--- a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
+++ b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
@@ -99,14 +99,14 @@ data_id=`basename $data_dir`
 sad_dir=${dir}/${sad_name}${affix}_${data_id}_whole${feat_affix}
 seg_dir=${dir}/${segmentation_name}${affix}_${data_id}_whole${feat_affix}
 
-test_data_dir=data/${data_id}${feat_affix}_hires
-
 if $convert_data_dir_to_whole; then
+  test_data_dir=data/${data_id}_whole${feat_affix}_hires
   if [ $stage -le 0 ]; then
     rm -r ${test_data_dir} || true
     utils/data/convert_data_dir_to_whole.sh $src_data_dir ${test_data_dir}
   fi
 else
+  test_data_dir=data/${data_id}${feat_affix}_hires
   if [ $stage -le 0 ]; then
     rm -r ${test_data_dir} || true
     utils/copy_data_dir.sh $src_data_dir $test_data_dir
@@ -170,7 +170,8 @@ fi
 ## Prepare FST we search to make speech/silence decisions.
 ###############################################################################
 
-frame_shift=$(utils/data/get_frame_shift.sh $test_data_dir)
+utils/data/get_utt2dur.sh --nj $nj --cmd "$cmd" $test_data_dir || exit 1
+frame_shift=$(utils/data/get_frame_shift.sh $test_data_dir) || exit 1
 
 graph_dir=${dir}/graph_${output_name}
 if [ $stage -le 5 ]; then
diff --git a/src/online2bin/ivector-extract-online2.cc b/src/online2bin/ivector-extract-online2.cc
index 4d71c2923ab..33aa990d1c3 100644
--- a/src/online2bin/ivector-extract-online2.cc
+++ b/src/online2bin/ivector-extract-online2.cc
@@ -55,6 +55,8 @@ int main(int argc, char *argv[]) {
 
     g_num_threads = 8;
     bool repeat = false;
+    int32 length_tolerance = 0;
+    std::string frame_weights_rspecifier;
     
     po.Register("num-threads", &g_num_threads,
                 "Number of threads to use for computing derived variables "
@@ -62,6 +64,12 @@ int main(int argc, char *argv[]) {
     po.Register("repeat", &repeat,
                 "If true, output the same number of iVectors as input frames "
                 "(including repeated data).");
+    po.Register("frame-weights-rspecifier", &frame_weights_rspecifier,
+                "Archive of frame weights to scale stats");
+    po.Register("length-tolerance", &length_tolerance,
+                "Tolerance on the difference in number of frames "
+                "for feats and frame weights");
+
     po.Read(argc, argv);
     
     if (po.NumArgs() != 3) {
@@ -82,6 +90,7 @@ int main(int argc, char *argv[]) {
     
     SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
     RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
+    RandomAccessBaseFloatVectorReader frame_weights_reader(frame_weights_rspecifier);
     BaseFloatMatrixWriter ivector_writer(ivectors_wspecifier);
     
     
@@ -106,6 +115,31 @@ int main(int argc, char *argv[]) {
         
         ivector_feature.SetAdaptationState(adaptation_state);
 
+        if (!frame_weights_rspecifier.empty()) {
+          if (!frame_weights_reader.HasKey(utt)) {
+            KALDI_WARN << "Did not find weights for utterance " << utt;
+            num_err++;
+            continue;
+          }
+          const Vector<BaseFloat> &weights = frame_weights_reader.Value(utt);
+
+          if (std::abs(weights.Dim() - feats.NumRows()) > length_tolerance) {
+            num_err++;
+            continue;
+          }
+
+          std::vector<std::pair<int32, BaseFloat> > frame_weights;
+          for (int32 i = 0; i < feats.NumRows(); i++) {
+            if (i < weights.Dim())
+              frame_weights.push_back(std::make_pair(i, weights(i)));
+            else
+              frame_weights.push_back(std::make_pair(i, 0.0));
+          }
+
+
+          ivector_feature.UpdateFrameWeights(frame_weights);
+        }
+
         int32 T = feats.NumRows(),
             n = (repeat ? 1 : ivector_config.ivector_period),
             num_ivectors = (T + n - 1) / n;