diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh index d45095ec85b..e0b9af96b8c 100755 --- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh +++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh @@ -17,10 +17,12 @@ if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi if $speed_perturb; then if [ $stage -le 1 ]; then - # Although the nnet will be trained by high resolution data, we still have to perturb the normal data to get the alignments - # _sp stands for speed-perturbed + # Although the nnet will be trained by high resolution data, we still have + # to perturb the normal data to get the alignments _sp stands for + # speed-perturbed echo "$0: preparing directory for speed-perturbed data" - utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + utils/data/perturb_data_dir_speed_3way.sh --always-include-prefix true \ + data/${train_set} data/${train_set}_sp echo "$0: creating MFCC features for low-resolution speed-perturbed data" mfccdir=mfcc_perturbed diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh index 048220d62fd..f857ae2bdd7 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh @@ -1,20 +1,36 @@ #!/bin/bash -# Copyright 2016 Johns Hopkins University (author: Daniel Povey) +# Copyright 2016-2018 Johns Hopkins University (author: Daniel Povey) +# 2018 Hossein Hadian # Apache 2.0 # This script does the standard 3-way speed perturbing of # a data directory (it operates on the wav.scp). +# If you add the option "--always-include-prefix true", it will include the +# prefix "sp1.0-" for the original un-perturbed data. This can help resolve +# problems with sorting. +# We don't make '--always-include-prefix true' the default behavior because +# it can break some older scripts that relied on the original utterance-ids +# being a subset of the perturbed data's utterance-ids. + +always_include_prefix=false + . utils/parse_options.sh if [ $# != 2 ]; then echo "Usage: perturb_data_dir_speed_3way.sh " echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1." echo "e.g.:" - echo " $0 data/train data/train_sp" + echo " $0 [options] data/train data/train_sp" echo "Note: if /feats.scp already exists, this will refuse to run." + echo "Options:" + echo " --always-include-prefix [true|false] # default: false. If set to true," + echo " # it will add the prefix 'sp1.0-' to" + echo " # utterance and speaker-ids for data at" + echo " # the original speed. Can resolve" + echo " # issues RE data sorting." exit 1 fi @@ -39,16 +55,25 @@ utils/data/get_utt2dur.sh ${srcdir} utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1 utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1 -utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0 -if [ ! -f $srcdir/utt2uniq ]; then - cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq +if $always_include_prefix; then + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0 + if [ ! -f $srcdir/utt2uniq ]; then + cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq + else + cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq + fi + utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 + + rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0 else - cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq + utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 + rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 fi -utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 - -rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0 - echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir" -utils/validate_data_dir.sh --no-feats --no-text $destdir +if ! utils/validate_data_dir.sh --no-feats --no-text $destdir; then + echo "$0: Validation failed. If it is a sorting issue, try the option '--always-include-prefix true'." + exit 1 +fi + +exit 0