diff --git a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
index ea863cb672b..7ce4d553733 100755
--- a/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
+++ b/egs/swbd/s5c/local/xvector/prepare_perturbed_data.sh
@@ -21,11 +21,11 @@ if [ $stage -le 1 ]; then
     if [ -d data/${datadir}_sp ]; then
       echo "$0: directory ${datadir}_sp already exists, skipping creating it."
     else
-      utils/data/perturb_data_dir_speed_3way.sh ${datadir} ${datadir}_sp
-      utils/data/perturb_data_dir_volume.sh ${datadir}_sp
+      utils/data/perturb_data_dir_speed_3way.sh data/${datadir} data/${datadir}_sp
+      utils/data/perturb_data_dir_volume.sh data/${datadir}_sp
     fi
     if [ -f data/${datadir}_sp_hires/feats.scp ]; then
-      echo "$0: directory ${datadir}_sp_hires/feats.scp already exists, skipping creating it."
+      echo "$0: directory data/${datadir}_sp_hires/feats.scp already exists, skipping creating it."
     else
       mfccdir=mfcc
       utils/copy_data_dir.sh data/${datadir}_sp data/${datadir}_sp_hires
diff --git a/egs/wsj/s5/steps/nnet3/xvector/allocate_examples.py b/egs/wsj/s5/steps/nnet3/xvector/allocate_examples.py
index 24d6bdf217a..8ef0ded7c15 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/allocate_examples.py
+++ b/egs/wsj/s5/steps/nnet3/xvector/allocate_examples.py
@@ -9,21 +9,21 @@
 #   --num-archives=169 --num-jobs=24  exp/xvector_a/egs/temp/utt2len.train exp/xvector_a/egs
 #
 # and this program outputs certain things to the temp directory (exp/xvector_a/egs/temp in this case)
-# that will enable you to dump the xvectors.  What we'll eventually be doing is invoking the following
-# program with something like the following args:
+# that will enable you to dump the chunks for xvector training.  What we'll eventually be doing is invoking
+# the following program with something like the following args:
 #
-#  nnet3-xvector-get-egs1 [options] exp/xvector_a/temp/ranges.1  scp:data/train/feats.scp \
+#  nnet3-xvector-get-egs [options] exp/xvector_a/temp/ranges.1  scp:data/train/feats.scp \
 #    ark:exp/xvector_a/egs/egs_temp.1.ark ark:exp/xvector_a/egs/egs_temp.2.ark \
 #    ark:exp/xvector_a/egs/egs_temp.3.ark
 #
 # where exp/xvector_a/temp/ranges.1 contains something like the following:
 #
-#   utt1  3   0   65  112  110
-#   utt1  0   160 50  214  180
+#   utt1  3  0  0   65  112  110
+#   utt1  0  2  160 50  214  180
 #   utt2  ...
 #
 # where each line is interpreted as follows:
-#  <source-utterance> <output-archive-index>  <start-frame-index1> <num-frames1> <start-frame-index2> <num-frames2>
+#  <source-utterance> <relative-archive-index> <ouput-archive-index> <start-frame-index1> <num-frames1> <start-frame-index2> <num-frames2>
 # and for each line we create an eg (containing two possibly-different-length chunks of data from the
 # same utterance), to one of the output archives.  The list of archives corresponding to
 # ranges.n will be written to output.n, so in exp/xvector_a/temp/outputs.1 we'd have:
@@ -52,10 +52,18 @@
 parser = argparse.ArgumentParser(description="Writes ranges.*, outputs.* and archive_chunk_lengths files "
                                  "in preparation for dumping egs for xvector training.",
                                  epilog="Called by steps/nnet3/xvector/get_egs.sh")
+parser.add_argument("--prefix", type=str, default="",
+                   help="Adds a prefix to the output files. This is used to distinguish between the train "
+                   "and diagnostic files.")
 parser.add_argument("--min-frames-per-chunk", type=int, default=50,
                     help="Minimum number of frames-per-chunk used for any archive")
 parser.add_argument("--max-frames-per-chunk", type=int, default=300,
                     help="Maximum number of frames-per-chunk used for any archive")
+parser.add_argument("--randomize-chunk-length", type=str,
+                    help="If true, randomly pick a chunk length in [min-frames-per-chunk, max-frames-per-chunk]."
+                    "If false, the chunk length varies from min-frames-per-chunk to max-frames-per-chunk"
+                    "according to a geometric sequence.",
+                    default="true", choices = ["false", "true"])
 parser.add_argument("--frames-per-iter", type=int, default=1000000,
                     help="Target number of frames for each archive")
 parser.add_argument("--num-archives", type=int, default=-1,
@@ -137,6 +145,18 @@ def RandomChunkLength():
     ans = int(math.exp(log_value) + 0.45)
     return ans
 
+# This function returns an integer in the range
+# [min-frames-per-chunk, max-frames-per-chunk] according to a geometric
+# sequence. For example, suppose min-frames-per-chunk is 50,
+# max-frames-per-chunk is 200, and args.num_archives is 3. Then the
+# lengths for archives 0, 1, and 2 will be 50, 100, and 200.
+def DeterministicChunkLength(archive_id):
+  ans = int(math.pow(float(args.max_frames_per_chunk) /
+                     args.min_frames_per_chunk, float(archive_id) /
+                     (args.num_archives-1)) * args.min_frames_per_chunk + 0.5)
+  return ans
+
+
 
 # given an utterance length utt_length (in frames) and two desired chunk lengths
 # (length1 and length2) whose sum is <= utt_length,
@@ -180,14 +200,21 @@ def GetRandomOffsets(utt_length, length1, length2):
 # an array of 3-tuples (utterance-index, offset1, offset2)
 all_egs= []
 
-info_f = open(args.egs_dir + "/temp/archive_chunk_lengths", "w")
-if info_f is None:
-    sys.exit("Error opening file {0}/temp/archive_chunk_lengths".format(args.egs_dir));
+prefix = ""
+if args.prefix != "":
+  prefix = args.prefix + "_"
 
+info_f = open(args.egs_dir + "/temp/" + prefix + "archive_chunk_lengths", "w")
+if info_f is None:
+    sys.exit(str("Error opening file {0}/temp/" + prefix + "archive_chunk_lengths").format(args.egs_dir));
 for archive_index in range(args.num_archives):
     print("Processing archive {0}".format(archive_index + 1))
-    length1 = RandomChunkLength();
-    length2 = RandomChunkLength();
+    if args.randomize_chunk_length == "true":
+      length1 = RandomChunkLength();
+      length2 = length1
+    else:
+      length1 = DeterministicChunkLength(archive_index);
+      length2 = length1
     print("{0} {1} {2}".format(archive_index + 1, length1, length2), file=info_f)
     archive_chunk_lengths.append( (length1, length2) )
     tot_length = length1 + length2
@@ -218,12 +245,13 @@ def GetRandomOffsets(utt_length, length1, length2):
         for (utterance_index, offset1, offset2) in all_egs[cur_archive]:
             this_ranges.append( (utterance_index, i, offset1, offset2) )
         cur_archive = cur_archive + 1
-    f = open(args.egs_dir + "/temp/ranges." + str(job + 1), "w")
+    f = open(args.egs_dir + "/temp/" + prefix + "ranges." + str(job + 1), "w")
     if f is None:
-        sys.exit("Error opening file " + args.egs_dir + "/temp/ranges." + str(job + 1))
+        sys.exit("Error opening file " + args.egs_dir + "/temp/" + prefix + "ranges." + str(job + 1))
     for (utterance_index, i, offset1, offset2) in sorted(this_ranges):
         archive_index = this_archives_for_job[i]
-        print("{0} {1} {2} {3} {4}".format(utt_ids[utterance_index],
+        print("{0} {1} {2} {3} {4} {5} {6}".format(utt_ids[utterance_index],
+                                           i,
                                            archive_index + 1,
                                            offset1,
                                            archive_chunk_lengths[archive_index][0],
@@ -232,13 +260,13 @@ def GetRandomOffsets(utt_length, length1, length2):
               file=f)
     f.close()
 
-    f = open(args.egs_dir + "/temp/outputs." + str(job + 1), "w")
+    f = open(args.egs_dir + "/temp/" + prefix + "outputs." + str(job + 1), "w")
     if f is None:
-        sys.exit("Error opening file " + args.egs_dir + "/temp/outputs." + str(job + 1))
-    print( " ".join([ "{0}/egs_temp.{1}.ark".format(args.egs_dir, n + 1) for n in this_archives_for_job ]),
+        sys.exit("Error opening file " + args.egs_dir + "/temp/" + prefix + "outputs." + str(job + 1))
+    print( " ".join([ str("{0}/" + prefix + "egs_temp.{1}.ark").format(args.egs_dir, n + 1) for n in this_archives_for_job ]),
            file=f)
     f.close()
 
 
-print("allocate_examples.py: finished generating ranges.* and outputs.* files")
+print("allocate_examples.py: finished generating " + prefix + "ranges.* and " + prefix + "outputs.* files")
 
diff --git a/egs/wsj/s5/steps/nnet3/xvector/get_egs.sh b/egs/wsj/s5/steps/nnet3/xvector/get_egs.sh
index 4b0d558bc09..2ab81395d47 100755
--- a/egs/wsj/s5/steps/nnet3/xvector/get_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/xvector/get_egs.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 
-# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey)
+#                2016 David Snyder
+# Apache 2.0
 #
 # This script dumps training examples (egs) for xvector training.  These egs
 # have only an input and no outputs (the inputs are typically MFCCs).  The egs
@@ -15,12 +17,6 @@
 # This script, which will generally be called from other neural-net training
 # scripts, extracts the training examples used to train the neural net (and also
 # the validation examples used for diagnostics), and puts them in separate archives.
-#
-# This script dumps egs with several frames of labels, controlled by the
-# frames_per_eg config variable (default: 8).  This takes many times less disk
-# space because typically we have 4 to 7 frames of context on the left and
-# right, and this ends up getting shared.  This is at the expense of slightly
-# higher disk I/O while training.
 
 
 # Begin configuration section.
@@ -94,6 +90,9 @@ if [ ! -f $data/feats.scp ]; then
   exit 1
 fi
 
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
 if [ ! -f $data/utt2dur ]; then
   # getting this utt2dur will normally be more lightweight than
   # getting the exact utterance-to-length map.
@@ -120,28 +119,42 @@ if [ $stage -le 1 ]; then
   echo "$0: getting list of validation utterances"
 
 # Get list of validation utterances.
-  awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+  awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_heldout_utts \
     > $temp/valid_uttlist || exit 1;
 
+  awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $temp/valid_uttlist \
+    | utils/shuffle_list.pl | head -$num_heldout_utts > $temp/train_subset_uttlist || exit 1;
+
   if [ -f $data/utt2uniq ]; then  # this matters if you use data augmentation.
-    echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
-    echo "include all perturbed versions of the same 'real' utterances."
-    mv $temp/valid_uttlist $temp/valid_uttlist.tmp
     utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $temp/uniq2utt
-    cat $temp/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
-      sort | uniq | utils/apply_map.pl $temp/uniq2utt | \
-      awk '{for(n=1;n<=NF;n++) print $n;}' | sort  > $temp/valid_uttlist
-    rm $temp/uniq2utt $temp/valid_uttlist.tmp
+    for uttlist in valid_uttlist train_subset_uttlist; do
+      echo "File $data/utt2uniq exists, so augmenting $uttlist to"
+      echo "include all perturbed versions of the same 'real' utterances."
+      mv $temp/$uttlist $temp/${uttlist}.tmp
+      cat $temp/$uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
+        sort | uniq | utils/apply_map.pl $temp/uniq2utt | \
+        awk '{for(n=1;n<=NF;n++) print $n;}' | sort  > $temp/$uttlist
+    done
+    rm $temp/uniq2utt $temp/$uttlist.tmp
   fi
+
+  awk '{print $1}' $temp/utt2len |
   utils/filter_scp.pl --exclude $temp/valid_uttlist <$temp/utt2len > $temp/utt2len.train
   utils/filter_scp.pl $temp/valid_uttlist <$temp/utt2len > $temp/utt2len.valid
+  utils/filter_scp.pl $temp/train_subset_uttlist <$temp/utt2len > $temp/utt2len.train_subset
 fi
 
+# TODO: Currently just supporting raw features
+feats="scp,s,cs:utils/filter_scp.pl $temp/ranges.JOB $data/feats.scp |"
+valid_feats="scp,s,cs:utils/filter_scp.pl $temp/valid_uttlist $data/feats.scp |"
+train_subset_feats="scp,s,cs:utils/filter_scp.pl $temp/train_subset_uttlist $data/feats.scp |"
+
 
 # first for the training data... work out how many archives.
 
 num_train_frames=$(awk '{n += $2} END{print n}' <$temp/utt2len.train)
 num_valid_frames=$(awk '{n += $2} END{print n}' <$temp/utt2len.valid)
+num_train_subset_frames=$(awk '{n += $2} END{print n}' <$temp/utt2len.train_subset)
 
 echo $num_train_frames >$dir/info/num_frames
 
@@ -166,147 +179,71 @@ if [ $stage -le 2 ]; then
 fi
 
 if [ $stage -le 3 ]; then
-  echo "$0: allocating examples"
-  $cmd $dir/log/allocate_examples.log \
+  echo "$0: allocating training examples"
+  $cmd $dir/log/allocate_examples_train.log \
     steps/nnet3/xvector/allocate_examples.py \
       --min-frames-per-chunk=$min_frames_per_chunk \
       --max-frames-per-chunk=$max_frames_per_chunk \
       --frames-per-iter=$frames_per_iter \
       --num-archives=$num_train_archives --num-jobs=$nj \
       $dir/temp/utt2len.train $dir  || exit 1
-fi
-
-# HERE - todo.
-
-exit 0
-
 
+  echo "$0: allocating training subset examples"
+  $cmd $dir/log/allocate_examples_train_subset.log \
+    steps/nnet3/xvector/allocate_examples.py \
+      --prefix train_subset \
+      --min-frames-per-chunk=$min_frames_per_chunk \
+      --max-frames-per-chunk=$max_frames_per_chunk \
+      --randomize-chunk-length false \
+      --frames-per-iter=$frames_per_iter_diagnostic \
+      --num-archives=$num_diagnostic_archives --num-jobs=1 \
+      $dir/temp/utt2len.train_subset $dir  || exit 1
 
-
-if [ $stage -le 2 ]; then
-  echo "$0: copying data alignments"
-  for id in $(seq $num_ali_jobs); do gunzip -c $alidir/ali.$id.gz; done | \
-    copy-int-vector ark:- ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1;
-fi
-
-egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress"
-
-[ -z $valid_left_context ] &&  valid_left_context=$left_context;
-[ -z $valid_right_context ] &&  valid_right_context=$right_context;
-valid_egs_opts="--left-context=$valid_left_context --right-context=$valid_right_context --compress=$compress"
-
-echo $left_context > $dir/info/left_context
-echo $right_context > $dir/info/right_context
-num_pdfs=$(tree-info --print-args=false $alidir/tree | grep num-pdfs | awk '{print $2}')
-if [ $stage -le 3 ]; then
-  echo "$0: Getting validation and training subset examples."
-  rm $dir/.error 2>/dev/null
-  echo "$0: ... extracting validation and training-subset alignments."
-
-  utils/filter_scp.pl <(cat $dir/valid_uttlist $dir/train_subset_uttlist) \
-    <$dir/ali.scp >$dir/ali_special.scp
-
-  $cmd $dir/log/create_valid_subset.log \
-    nnet3-get-egs --num-pdfs=$num_pdfs $valid_ivector_opt $valid_egs_opts "$valid_feats" \
-    "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \
-    "ark:$dir/valid_all.egs" || touch $dir/.error &
-  $cmd $dir/log/create_train_subset.log \
-    nnet3-get-egs --num-pdfs=$num_pdfs $train_subset_ivector_opt $valid_egs_opts "$train_subset_feats" \
-     "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \
-     "ark:$dir/train_subset_all.egs" || touch $dir/.error &
-  wait;
-  [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
-  echo "... Getting subsets of validation examples for diagnostics and combination."
-  $cmd $dir/log/create_valid_subset_combine.log \
-    nnet3-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
-    ark:$dir/valid_combine.egs || touch $dir/.error &
-  $cmd $dir/log/create_valid_subset_diagnostic.log \
-    nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
-    ark:$dir/valid_diagnostic.egs || touch $dir/.error &
-
-  $cmd $dir/log/create_train_subset_combine.log \
-    nnet3-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
-    ark:$dir/train_combine.egs || touch $dir/.error &
-  $cmd $dir/log/create_train_subset_diagnostic.log \
-    nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
-    ark:$dir/train_diagnostic.egs || touch $dir/.error &
-  wait
-  sleep 5  # wait for file system to sync.
-  cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
-
-  for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do
-    [ ! -s $f ] && echo "No examples in file $f" && exit 1;
-  done
-  rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs
+  echo "$0: allocating validation examples"
+  $cmd $dir/log/allocate_examples_valid.log \
+    steps/nnet3/xvector/allocate_examples.py \
+      --prefix valid \
+      --min-frames-per-chunk=$min_frames_per_chunk \
+      --max-frames-per-chunk=$max_frames_per_chunk \
+      --randomize-chunk-length false \
+      --frames-per-iter=$frames_per_iter_diagnostic \
+      --frames-per-iter=$frames_per_iter_diagnostic \
+      --num-archives=$num_diagnostic_archives --num-jobs=1 \
+      $dir/temp/utt2len.valid $dir  || exit 1
 fi
 
 if [ $stage -le 4 ]; then
-  # create egs_orig.*.*.ark; the first index goes to $nj,
-  # the second to $num_archives_intermediate.
-
-  egs_list=
-  for n in $(seq $num_archives_intermediate); do
-    egs_list="$egs_list ark:$dir/egs_orig.JOB.$n.ark"
-  done
   echo "$0: Generating training examples on disk"
-  # The examples will go round-robin to egs_list.
-  $cmd JOB=1:$nj $dir/log/get_egs.JOB.log \
-    nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opt $egs_opts --num-frames=$frames_per_eg "$feats" \
-    "ark,s,cs:filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp | ali-to-pdf $alidir/final.mdl scp:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
-    nnet3-copy-egs --random=true --srand=JOB ark:- $egs_list || exit 1;
+  for g in $(seq $nj); do
+    outputs=`awk '{for(i=1;i<=NF;i++)printf("ark:%s ",$i);}' $temp/outputs.$g`
+    $cmd $dir/log/train_create_examples.$g.log \
+      nnet3-xvector-get-egs $temp/ranges.$g \
+      "`echo $feats | sed s/JOB/$g/g`" $outputs || exit 1 &
+  done
+  wait
+  train_subset_outputs=`awk '{for(i=1;i<=NF;i++)printf("ark:%s ",$i);}' $temp/train_subset_outputs.1`
+  echo "$0: Generating training subset examples on disk"
+  $cmd $dir/log/train_subset_create_examples.1.log \
+    nnet3-xvector-get-egs $temp/train_subset_ranges.1 \
+    "$train_subset_feats" $train_subset_outputs || exit 1
+  valid_outputs=`awk '{for(i=1;i<=NF;i++)printf("ark:%s ",$i);}' $temp/valid_outputs.1`
+  echo "$0: Generating validation examples on disk"
+  $cmd $dir/log/valid_create_examples.1.log \
+    nnet3-xvector-get-egs $temp/valid_ranges.1 \
+    "$valid_feats" $valid_outputs || exit 1
 fi
 
 if [ $stage -le 5 ]; then
-  echo "$0: recombining and shuffling order of archives on disk"
-  # combine all the "egs_orig.*.JOB.scp" (over the $nj splits of the data) and
-  # shuffle the order, writing to the egs.JOB.ark
-
-  # the input is a concatenation over the input jobs.
-  egs_list=
-  for n in $(seq $nj); do
-    egs_list="$egs_list $dir/egs_orig.$n.JOB.ark"
-  done
-
-  if [ $archives_multiple == 1 ]; then # normal case.
-    $cmd --max-jobs-run $nj JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \
-      nnet3-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:$dir/egs.JOB.ark  || exit 1;
-  else
-    # we need to shuffle the 'intermediate archives' and then split into the
-    # final archives.  we create soft links to manage this splitting, because
-    # otherwise managing the output names is quite difficult (and we don't want
-    # to submit separate queue jobs for each intermediate archive, because then
-    # the --max-jobs-run option is hard to enforce).
-    output_archives="$(for y in $(seq $archives_multiple); do echo ark:$dir/egs.JOB.$y.ark; done)"
-    for x in $(seq $num_archives_intermediate); do
-      for y in $(seq $archives_multiple); do
-        archive_index=$[($x-1)*$archives_multiple+$y]
-        # egs.intermediate_archive.{1,2,...}.ark will point to egs.archive.ark
-        ln -sf egs.$archive_index.ark $dir/egs.$x.$y.ark || exit 1
-      done
-    done
-    $cmd --max-jobs-run $nj JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \
-      nnet3-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:- \| \
-      nnet3-copy-egs ark:- $output_archives || exit 1;
-  fi
-
+  echo "$0: Shuffling order of archives on disk"
+  $cmd --max-jobs-run $nj JOB=1:$num_train_archives $dir/log/shuffle.JOB.log \
+    nnet3-shuffle-egs --srand=JOB ark:$dir/egs_temp.JOB.ark ark:$dir/egs.JOB.ark  || exit 1;
+
+  $cmd --max-jobs-run $nj JOB=1:$num_diagnostic_archives $dir/log/train_subset_shuffle.JOB.log \
+    nnet3-shuffle-egs --srand=JOB ark:$dir/train_subset_egs_temp.JOB.ark ark:$dir/train_diagnostic_egs.JOB.ark  || exit 1;
+  $cmd --max-jobs-run $nj JOB=1:$num_diagnostic_archives $dir/log/valid_shuffle.JOB.log \
+    nnet3-shuffle-egs --srand=JOB ark:$dir/valid_egs_temp.JOB.ark ark:$dir/valid_diagnostic_egs.JOB.ark  || exit 1;
 fi
 
-if [ $stage -le 6 ]; then
-  echo "$0: removing temporary archives"
-  for x in $(seq $nj); do
-    for y in $(seq $num_archives_intermediate); do
-      file=$dir/egs_orig.$x.$y.ark
-      [ -L $file ] && rm $(readlink -f $file)
-      rm $file
-    done
-  done
-  if [ $archives_multiple -gt 1 ]; then
-    # there are some extra soft links that we should delete.
-    for f in $dir/egs.*.*.ark; do rm $f; done
-  fi
-  echo "$0: removing temporary alignments and transforms"
-  # Ignore errors below because trans.* might not exist.
-  rm $dir/{ali,trans}.{ark,scp} 2>/dev/null
-fi
+#TODO: Probably need to cleanup the temp egs.
 
 echo "$0: Finished preparing training examples"
diff --git a/src/nnet3bin/nnet3-xvector-get-egs.cc b/src/nnet3bin/nnet3-xvector-get-egs.cc
index 24e50560b54..55ba475c0fe 100644
--- a/src/nnet3bin/nnet3-xvector-get-egs.cc
+++ b/src/nnet3bin/nnet3-xvector-get-egs.cc
@@ -49,15 +49,15 @@ static void ProcessRangeFile(const std::string &range_rxfilename,
       ChunkPairInfo *pair = new ChunkPairInfo();
       std::vector<std::string> fields;
       SplitStringToVector(line, " \t\n\r", true, &fields);
-      if (fields.size() != 6)
-        KALDI_ERR << "Expected 6 fields in line of range file, got "
+      if (fields.size() != 7)
+        KALDI_ERR << "Expected 7 fields in line of range file, got "
                   << fields.size() << " instead.";
 
       std::string utt = fields[0],
-                  start_frame1_str = fields[2],
-                  num_frames1_str = fields[3],
-                  start_frame2_str = fields[4],
-                  num_frames2_str = fields[5];
+                  start_frame1_str = fields[3],
+                  num_frames1_str = fields[4],
+                  start_frame2_str = fields[5],
+                  num_frames2_str = fields[6];
 
       if (!ConvertStringToInteger(fields[1], &(pair->output_archive_id))
           || !ConvertStringToInteger(start_frame1_str, &(pair->start_frame1))
@@ -166,11 +166,12 @@ int main(int argc, char *argv[]) {
         "the same utterance.  The location and length of the feature chunks\n"
         "are specified in the 'ranges' file.  Each line is interpreted as\n"
         "follows:\n"
-        "  <source-utterance> <output-archive-index>  <start-frame-index1>"
-        " <num-frames1> <start-frame-index2> <num-frames2>\n"
+        "  <source-utterance> <relative-output-archive-index> "
+        "<output-archive-index>  <start-frame-index1> <num-frames1> "
+        "<start-frame-index2> <num-frames2>\n"
         "For example:\n"
-        "  utt1  3   0   65  112  110\n"
-        "  utt1  0   160 50  214  180\n"
+        "  utt1  3  13  0   65  112  110\n"
+        "  utt1  0  10  160 50  214  180\n"
         "  utt2  ...\n"
         "\n"
         "Usage:  nnet3-xvector-get-egs [options] <ranges-filename> "