Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 28 additions & 35 deletions egs/aspire/s5/local/run_asr_segmentation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ reco_nj=40

# test options
test_nj=30
test_stage=1

. ./cmd.sh
if [ -f ./path.sh ]; then . ./path.sh; fi
Expand Down Expand Up @@ -85,12 +84,10 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
exit 1
fi

data_id=$(basename $data_dir)
whole_data_dir=${data_dir}_whole
targets_dir=exp/segmentation${affix}/${data_id}_whole_combined_targets_sub3
whole_data_id=$(basename $whole_data_dir)

rvb_data_dir=${whole_data_dir}_rvb_hires
rvb_targets_dir=${targets_dir}_rvb

if [ $stage -le 0 ]; then
utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
Expand All @@ -101,26 +98,15 @@ fi
###############################################################################
if [ $stage -le 1 ]; then
steps/make_mfcc.sh --nj $reco_nj --cmd "$train_cmd" --write-utt2num-frames true \
$whole_data_dir exp/make_mfcc/${data_id}_whole
steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/${data_id}_whole
$whole_data_dir exp/make_mfcc/${whole_data_id}
steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/${whole_data_id}
utils/fix_data_dir.sh $whole_data_dir
fi

###############################################################################
# Get feats for the manual segments
# Prepare SAD targets for recordings
###############################################################################
if [ $stage -le 2 ]; then
if [ ! -f ${data_dir}/segments ]; then
utils/data/get_segments_for_data.sh $data_dir > $data_dir/segments
fi
utils/data/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp
cp $data_dir/tmp/feats.scp $data_dir

# Use recording as the "speaker". This is required by prepare_targets_gmm.sh script.
awk '{print $1" "$2}' $data_dir/segments > $data_dir/utt2spk
utils/utt2spk_to_spk2utt.pl $data_dir/utt2spk > $data_dir/spk2utt
fi

targets_dir=$dir/${whole_data_id}_combined_targets_sub3
if [ $stage -le 3 ]; then
steps/segmentation/prepare_targets_gmm.sh --stage $prepare_targets_stage \
--train-cmd "$train_cmd" --decode-cmd "$decode_cmd" \
Expand All @@ -132,6 +118,7 @@ if [ $stage -le 3 ]; then
$lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
fi

rvb_targets_dir=${targets_dir}_rvb
if [ $stage -le 4 ]; then
# Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
if [ ! -f rirs_noises.zip ]; then
Expand Down Expand Up @@ -164,30 +151,29 @@ if [ $stage -le 4 ]; then
fi

if [ $stage -le 5 ]; then
steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj \
steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $reco_nj \
${rvb_data_dir}
steps/compute_cmvn_stats.sh ${rvb_data_dir}
utils/fix_data_dir.sh $rvb_data_dir
fi

if [ $stage -le 6 ]; then
rvb_targets_dirs=()
for i in `seq 1 $num_data_reps`; do
steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
$targets_dir ${targets_dir}_temp_$i || exit 1
rvb_targets_dirs+=(${targets_dir}_temp_$i)
done

steps/segmentation/combine_targets_dirs.sh \
$rvb_data_dir ${rvb_targets_dir} \
${rvb_targets_dirs[@]} || exit 1;

rm -r ${rvb_targets_dirs[@]}
rvb_targets_dirs=()
for i in `seq 1 $num_data_reps`; do
steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
$targets_dir ${targets_dir}_temp_$i || exit 1
rvb_targets_dirs+=(${targets_dir}_temp_$i)
done

steps/segmentation/combine_targets_dirs.sh \
$rvb_data_dir ${rvb_targets_dir} \
${rvb_targets_dirs[@]} || exit 1;

rm -r ${rvb_targets_dirs[@]}
fi

sad_nnet_dir=exp/segmentation${affix}/tdnn_stats_asr_sad_1a
#sad_nnet_dir=exp/segmentation${affix}/tdnn_lstm_asr_sad_1a
#sad_opts="--extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"

sad_nnet_dir=$dir/tdnn_stats_asr_sad_1a

if [ $stage -le 7 ]; then
# Train a STATS-pooling network for SAD
Expand Down Expand Up @@ -216,6 +202,13 @@ fi

chain_dir=exp/chain/tdnn_lstm_1a

# The context options in "sad_opts" must match the options used to train the
# SAD network in "sad_nnet_dir"
sad_opts="--extra-left-context 79 --extra-right-context 21 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"

# For LSTM SAD network, the options might be something like
# sad_opts="--extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"

if [ $stage -le 9 ]; then
# Use left and right context options that were used when training
# the chain nnet
Expand Down
44 changes: 22 additions & 22 deletions egs/babel/s5d/local/run_asr_segmentation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ merge_weights=1.0,0.1,0.5
prepare_targets_stage=-10
nstage=-10
train_stage=-10
test_stage=-10

affix=_1a
stage=-1
nj=80
reco_nj=40

# test options
test_nj=32
test_stage=-10

# Babel specific configuration. These two lines can be removed when adapting to other corpora.
[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1
Expand All @@ -63,14 +67,14 @@ garbage_phones="<oov> <vns>"
silence_phones="<sss> SIL"

for p in $garbage_phones; do
for affix in "" "_B" "_E" "_I" "_S"; do
echo "$p$affix"
for a in "" "_B" "_E" "_I" "_S"; do
echo "$p$a"
done
done > $dir/garbage_phones.txt

for p in $silence_phones; do
for affix in "" "_B" "_E" "_I" "_S"; do
echo "$p$affix"
for a in "" "_B" "_E" "_I" "_S"; do
echo "$p$a"
done
done > $dir/silence_phones.txt

Expand All @@ -81,6 +85,7 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
fi

whole_data_dir=${data_dir}_whole
whole_data_id=$(basename $whole_data_dir)

if [ $stage -le 0 ]; then
utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
Expand All @@ -91,39 +96,34 @@ fi
###############################################################################
if [ $stage -le 1 ]; then
if $use_pitch; then
steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $nj --write-utt2num-frames true \
steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $reco_nj --write-utt2num-frames true \
${whole_data_dir} || exit 1
else
steps/make_plp.sh --cmd "$train_cmd" --nj $nj --write-utt2num-frames true \
steps/make_plp.sh --cmd "$train_cmd" --nj $reco_nj --write-utt2num-frames true \
${whole_data_dir} || exit 1
fi
steps/compute_cmvn_stats.sh $whole_data_dir
utils/fix_data_dir.sh $whole_data_dir
fi

###############################################################################
# Get feats for the manual segments
# Prepare SAD targets for recordings
###############################################################################
if [ $stage -le 2 ]; then
if [ ! -f ${data_dir}/segments ]; then
utils/data/get_segments_for_data.sh $data_dir > $data_dir/segments
fi
utils/data/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp
cp $data_dir/tmp/feats.scp $data_dir
awk '{print $1" "$2}' $data_dir/segments > $data_dir/utt2spk
utils/utt2spk_to_spk2utt.pl $data_dir/utt2spk > $data_dir/spk2utt
fi

targets_dir=$dir/${whole_data_id}_combined_targets_sub3
if [ $stage -le 3 ]; then
steps/segmentation/prepare_targets_gmm.sh --stage $prepare_targets_stage \
--train-cmd "$train_cmd" --decode-cmd "$decode_cmd" \
--nj 80 --reco-nj 40 --lang-test $lang_test \
--nj $nj --reco-nj $reco_nj --lang-test $lang_test \
--garbage-phones-list $dir/garbage_phones.txt \
--silence-phones-list $dir/silence_phones.txt \
--merge-weights "$merge_weights" \
--graph-dir "$graph_dir" \
$lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
fi

if [ $stage -le 4 ]; then
utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}_hires_bp
steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj 40 \
steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj $reco_nj \
${whole_data_dir}_hires_bp
steps/compute_cmvn_stats.sh ${whole_data_dir}_hires_bp
fi
Expand All @@ -132,7 +132,7 @@ if [ $stage -le 5 ]; then
# Train a TDNN-LSTM network for SAD
local/segmentation/tuning/train_lstm_asr_sad_1a.sh \
--stage $nstage --train-stage $train_stage \
--targets-dir $dir \
--targets-dir $targets_dir \
--data-dir ${whole_data_dir}_hires_bp
fi

Expand All @@ -147,7 +147,7 @@ if [ $stage -le 6 ]; then
steps/segmentation/detect_speech_activity.sh \
--extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 \
--extra-left-context-initial 0 --extra-right-context-final 0 \
--nj 32 --acwt 0.3 --stage $test_stage \
--nj $test_nj --acwt 0.3 --stage $test_stage \
data/dev10h.pem \
exp/segmentation_1a/tdnn_lstm_asr_sad_1a \
mfcc_hires_bp \
Expand Down
2 changes: 1 addition & 1 deletion egs/chime5/s5/local/train_lms_srilm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ fi
# Kaldi transcript files contain Utterance_ID as the first word; remove it
# We also have to avoid skewing the LM by incorporating the same sentences
# from different channels
sed -e "s/\.CH.//" -e "s/_.\-./_/" $train_text | sort -u | \
sed -e "s/\.CH.//" -e "s/_.\-./_/" -e "s/NOLOCATION\(\.[LR]\)*-//" -e "s/U[0-9][0-9]_//" $train_text | sort -u | \
perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/train.txt
if (($?)); then
echo "Failed to create $tgtdir/train.txt from $train_text"
Expand Down
2 changes: 1 addition & 1 deletion egs/swbd/s5c/local/chain/run_tdnn.sh
Loading