diff --git a/egs/wsj/s5/utils/perturb_data_dir_speed.sh b/egs/wsj/s5/utils/perturb_data_dir_speed.sh index 667bd934f04..a50cdb04be4 100755 --- a/egs/wsj/s5/utils/perturb_data_dir_speed.sh +++ b/egs/wsj/s5/utils/perturb_data_dir_speed.sh @@ -2,6 +2,7 @@ # Copyright 2013 Johns Hopkins University (author: Daniel Povey) # 2014 Tom Ko +# 2018 Emotech LTD (author: Pawel Swietojanski) # Apache 2.0 # This script operates on a directory, such as in data/train/, @@ -10,6 +11,8 @@ # spk2utt # utt2spk # text +# utt2dur +# reco2dur # # It generates the files which are used for perturbing the speed of the original data. @@ -52,6 +55,7 @@ mkdir -p $destdir cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map +cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map if [ ! -f $srcdir/utt2uniq ]; then cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq else @@ -65,8 +69,6 @@ cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt if [ -f $srcdir/segments ]; then - # also apply the spk_prefix to the recording-ids. - cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \ utils/apply_map.pl -f 2 $destdir/reco_map | \ @@ -83,7 +85,6 @@ if [ -f $srcdir/segments ]; then utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel fi - rm $destdir/reco_map 2>/dev/null else # no segments->wav indexed by utterance. if [ -f $srcdir/wav.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ @@ -102,15 +103,23 @@ if [ -f $srcdir/spk2gender ]; then utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender fi +#prepare speed-perturbed utt2dur if [ ! -f $srcdir/utt2dur ]; then # generate utt2dur if it does not exist in srcdir utils/data/get_utt2dur.sh $srcdir fi - cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \ awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur -rm $destdir/spk_map $destdir/utt_map 2>/dev/null +#prepare speed-perturbed reco2dur +if [ ! -f $srcdir/reco2dur ]; then + # generate reco2dur if it does not exist in srcdir + utils/data/get_reco2dur.sh $srcdir +fi +cat $srcdir/reco2dur | utils/apply_map.pl -f 1 $destdir/reco_map | \ + awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/reco2dur + +rm $destdir/spk_map $destdir/utt_map $destdir/reco_map 2>/dev/null echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" utils/validate_data_dir.sh --no-feats --no-text $destdir