Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions egs/swbd/s5c/local/nnet3/run_ivector_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi

if $speed_perturb; then
if [ $stage -le 1 ]; then
# Although the nnet will be trained by high resolution data, we still have to perturb the normal data to get the alignments
# _sp stands for speed-perturbed
# Although the nnet will be trained by high resolution data, we still have
# to perturb the normal data to get the alignments _sp stands for
# speed-perturbed
echo "$0: preparing directory for speed-perturbed data"
utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
utils/data/perturb_data_dir_speed_3way.sh --always-include-prefix true \
data/${train_set} data/${train_set}_sp

echo "$0: creating MFCC features for low-resolution speed-perturbed data"
mfccdir=mfcc_perturbed
Expand Down
47 changes: 36 additions & 11 deletions egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,36 @@
#!/bin/bash

# Copyright 2016 Johns Hopkins University (author: Daniel Povey)
# Copyright 2016-2018 Johns Hopkins University (author: Daniel Povey)
# 2018 Hossein Hadian

# Apache 2.0

# This script does the standard 3-way speed perturbing of
# a data directory (it operates on the wav.scp).

# If you add the option "--always-include-prefix true", it will include the
# prefix "sp1.0-" for the original un-perturbed data. This can help resolve
# problems with sorting.
# We don't make '--always-include-prefix true' the default behavior because
# it can break some older scripts that relied on the original utterance-ids
# being a subset of the perturbed data's utterance-ids.

always_include_prefix=false

. utils/parse_options.sh

if [ $# != 2 ]; then
echo "Usage: perturb_data_dir_speed_3way.sh <srcdir> <destdir>"
echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1."
echo "e.g.:"
echo " $0 data/train data/train_sp"
echo " $0 [options] data/train data/train_sp"
echo "Note: if <destdir>/feats.scp already exists, this will refuse to run."
echo "Options:"
echo " --always-include-prefix [true|false] # default: false. If set to true,"
echo " # it will add the prefix 'sp1.0-' to"
echo " # utterance and speaker-ids for data at"
echo " # the original speed. Can resolve"
echo " # issues RE data sorting."
exit 1
fi

Expand All @@ -39,16 +55,25 @@ utils/data/get_utt2dur.sh ${srcdir}
utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1

utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0
if [ ! -f $srcdir/utt2uniq ]; then
cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq
if $always_include_prefix; then
utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0
if [ ! -f $srcdir/utt2uniq ]; then
cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq
else
cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq
fi
utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1

rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0
else
cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq
utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
fi

utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1

rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0

echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
utils/validate_data_dir.sh --no-feats --no-text $destdir
if ! utils/validate_data_dir.sh --no-feats --no-text $destdir; then
echo "$0: Validation failed. If it is a sorting issue, try the option '--always-include-prefix true'."
exit 1
fi

exit 0