Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions egs/wsj/s5/utils/perturb_data_dir_speed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Copyright 2013 Johns Hopkins University (author: Daniel Povey)
# 2014 Tom Ko
# 2018 Emotech LTD (author: Pawel Swietojanski)
# Apache 2.0

# This script operates on a directory, such as in data/train/,
Expand All @@ -10,6 +11,8 @@
# spk2utt
# utt2spk
# text
# utt2dur
# reco2dur
#
# It generates the files which are used for perturbing the speed of the original data.

Expand Down Expand Up @@ -52,6 +55,7 @@ mkdir -p $destdir

cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map
cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map
cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map
if [ ! -f $srcdir/utt2uniq ]; then
cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq
else
Expand All @@ -65,8 +69,6 @@ cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \
utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt

if [ -f $srcdir/segments ]; then
# also apply the spk_prefix to the recording-ids.
cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map

utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \
utils/apply_map.pl -f 2 $destdir/reco_map | \
Expand All @@ -83,7 +85,6 @@ if [ -f $srcdir/segments ]; then
utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
fi

rm $destdir/reco_map 2>/dev/null
else # no segments->wav indexed by utterance.
if [ -f $srcdir/wav.scp ]; then
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
Expand All @@ -102,15 +103,23 @@ if [ -f $srcdir/spk2gender ]; then
utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
fi

#prepare speed-perturbed utt2dur
if [ ! -f $srcdir/utt2dur ]; then
# generate utt2dur if it does not exist in srcdir
utils/data/get_utt2dur.sh $srcdir
fi

cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \
awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur

rm $destdir/spk_map $destdir/utt_map 2>/dev/null
#prepare speed-perturbed reco2dur
if [ ! -f $srcdir/reco2dur ]; then
# generate reco2dur if it does not exist in srcdir
utils/data/get_reco2dur.sh $srcdir
fi
cat $srcdir/reco2dur | utils/apply_map.pl -f 1 $destdir/reco_map | \
awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/reco2dur

rm $destdir/spk_map $destdir/utt_map $destdir/reco_map 2>/dev/null
echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir"

utils/validate_data_dir.sh --no-feats --no-text $destdir