chenzhehuai · chenzhehuai · Mar 22, 2018 · Mar 14, 2018 · Mar 14, 2018 · Mar 15, 2018
diff --git a/egs/aspire/s5/local/run_asr_segmentation.sh b/egs/aspire/s5/local/run_asr_segmentation.sh
@@ -48,7 +48,6 @@ reco_nj=40
 
 # test options
 test_nj=30
-test_stage=1
 
 . ./cmd.sh
 if [ -f ./path.sh ]; then . ./path.sh; fi
@@ -85,12 +84,10 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
   exit 1
 fi
 
-data_id=$(basename $data_dir)
 whole_data_dir=${data_dir}_whole
-targets_dir=exp/segmentation${affix}/${data_id}_whole_combined_targets_sub3
+whole_data_id=$(basename $whole_data_dir)
 
 rvb_data_dir=${whole_data_dir}_rvb_hires
-rvb_targets_dir=${targets_dir}_rvb
 
 if [ $stage -le 0 ]; then
   utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
@@ -101,26 +98,15 @@ fi
 ###############################################################################
 if [ $stage -le 1 ]; then
   steps/make_mfcc.sh --nj $reco_nj --cmd "$train_cmd"  --write-utt2num-frames true \
-    $whole_data_dir exp/make_mfcc/${data_id}_whole
-  steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/${data_id}_whole
+    $whole_data_dir exp/make_mfcc/${whole_data_id}
+  steps/compute_cmvn_stats.sh $whole_data_dir exp/make_mfcc/${whole_data_id}
   utils/fix_data_dir.sh $whole_data_dir
 fi
 
 ###############################################################################
-# Get feats for the manual segments
+# Prepare SAD targets for recordings
 ###############################################################################
-if [ $stage -le 2 ]; then
-  if [ ! -f ${data_dir}/segments ]; then
-    utils/data/get_segments_for_data.sh $data_dir > $data_dir/segments
-  fi
-  utils/data/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp
-  cp $data_dir/tmp/feats.scp $data_dir
-
-  # Use recording as the "speaker". This is required by prepare_targets_gmm.sh script.
-  awk '{print $1" "$2}' $data_dir/segments > $data_dir/utt2spk
-  utils/utt2spk_to_spk2utt.pl $data_dir/utt2spk > $data_dir/spk2utt
-fi
-
+targets_dir=$dir/${whole_data_id}_combined_targets_sub3
 if [ $stage -le 3 ]; then
   steps/segmentation/prepare_targets_gmm.sh --stage $prepare_targets_stage \
     --train-cmd "$train_cmd" --decode-cmd "$decode_cmd" \
@@ -132,6 +118,7 @@ if [ $stage -le 3 ]; then
     $lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
 fi
 
+rvb_targets_dir=${targets_dir}_rvb
 if [ $stage -le 4 ]; then
   # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
   if [ ! -f rirs_noises.zip ]; then
@@ -164,30 +151,29 @@ if [ $stage -le 4 ]; then
 fi
 
 if [ $stage -le 5 ]; then
-  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj \
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $reco_nj \
     ${rvb_data_dir}
   steps/compute_cmvn_stats.sh ${rvb_data_dir}
   utils/fix_data_dir.sh $rvb_data_dir
 fi
 
 if [ $stage -le 6 ]; then
-    rvb_targets_dirs=()
-    for i in `seq 1 $num_data_reps`; do
-      steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
-        $targets_dir ${targets_dir}_temp_$i || exit 1
-      rvb_targets_dirs+=(${targets_dir}_temp_$i)
-    done
-
-    steps/segmentation/combine_targets_dirs.sh \
-      $rvb_data_dir ${rvb_targets_dir} \
-      ${rvb_targets_dirs[@]} || exit 1;
-
-    rm -r ${rvb_targets_dirs[@]}
+  rvb_targets_dirs=()
+  for i in `seq 1 $num_data_reps`; do
+    steps/segmentation/copy_targets_dir.sh --utt-prefix "rev${i}_" \
+      $targets_dir ${targets_dir}_temp_$i || exit 1
+    rvb_targets_dirs+=(${targets_dir}_temp_$i)
+  done
+
+  steps/segmentation/combine_targets_dirs.sh \
+    $rvb_data_dir ${rvb_targets_dir} \
+    ${rvb_targets_dirs[@]} || exit 1;
+
+  rm -r ${rvb_targets_dirs[@]}
 fi
 
-sad_nnet_dir=exp/segmentation${affix}/tdnn_stats_asr_sad_1a
-#sad_nnet_dir=exp/segmentation${affix}/tdnn_lstm_asr_sad_1a
-#sad_opts="--extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"
+
+sad_nnet_dir=$dir/tdnn_stats_asr_sad_1a
 
 if [ $stage -le 7 ]; then
   # Train a STATS-pooling network for SAD
@@ -216,6 +202,13 @@ fi
 
 chain_dir=exp/chain/tdnn_lstm_1a
 
+# The context options in "sad_opts" must match the options used to train the 
+# SAD network in "sad_nnet_dir"
+sad_opts="--extra-left-context 79 --extra-right-context 21 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"
+
+# For LSTM SAD network, the options might be something like
+# sad_opts="--extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"
+
 if [ $stage -le 9 ]; then
   # Use left and right context options that were used when training
   # the chain nnet

diff --git a/egs/babel/s5d/local/run_asr_segmentation.sh b/egs/babel/s5d/local/run_asr_segmentation.sh
@@ -35,11 +35,15 @@ merge_weights=1.0,0.1,0.5
 prepare_targets_stage=-10
 nstage=-10
 train_stage=-10
-test_stage=-10
 
 affix=_1a
 stage=-1
 nj=80
+reco_nj=40
+
+# test options
+test_nj=32
+test_stage=-10
 
 # Babel specific configuration. These two lines can be removed when adapting to other corpora.
 [ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1
@@ -63,14 +67,14 @@ garbage_phones="<oov> <vns>"
 silence_phones="<sss> SIL"
 
 for p in $garbage_phones; do 
-  for affix in "" "_B" "_E" "_I" "_S"; do
-    echo "$p$affix"
+  for a in "" "_B" "_E" "_I" "_S"; do
+    echo "$p$a"
   done
 done > $dir/garbage_phones.txt
 
 for p in $silence_phones; do 
-  for affix in "" "_B" "_E" "_I" "_S"; do
-    echo "$p$affix"
+  for a in "" "_B" "_E" "_I" "_S"; do
+    echo "$p$a"
   done
 done > $dir/silence_phones.txt
 
@@ -81,6 +85,7 @@ if ! cat $dir/garbage_phones.txt $dir/silence_phones.txt | \
 fi
 
 whole_data_dir=${data_dir}_whole
+whole_data_id=$(basename $whole_data_dir)
 
 if [ $stage -le 0 ]; then
   utils/data/convert_data_dir_to_whole.sh $data_dir $whole_data_dir
@@ -91,39 +96,34 @@ fi
 ###############################################################################
 if [ $stage -le 1 ]; then
   if $use_pitch; then
-    steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $nj --write-utt2num-frames true \
+    steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $reco_nj --write-utt2num-frames true \
       ${whole_data_dir} || exit 1
   else
-    steps/make_plp.sh --cmd "$train_cmd" --nj $nj --write-utt2num-frames true \
+    steps/make_plp.sh --cmd "$train_cmd" --nj $reco_nj --write-utt2num-frames true \
       ${whole_data_dir} || exit 1
   fi
+  steps/compute_cmvn_stats.sh $whole_data_dir
+  utils/fix_data_dir.sh $whole_data_dir
 fi
 
 ###############################################################################
-# Get feats for the manual segments
+# Prepare SAD targets for recordings
 ###############################################################################
-if [ $stage -le 2 ]; then
-  if [ ! -f ${data_dir}/segments ]; then
-    utils/data/get_segments_for_data.sh $data_dir > $data_dir/segments
-  fi
-  utils/data/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp
-  cp $data_dir/tmp/feats.scp $data_dir
-  awk '{print $1" "$2}' $data_dir/segments > $data_dir/utt2spk
-  utils/utt2spk_to_spk2utt.pl $data_dir/utt2spk > $data_dir/spk2utt
-fi
-
+targets_dir=$dir/${whole_data_id}_combined_targets_sub3
 if [ $stage -le 3 ]; then
   steps/segmentation/prepare_targets_gmm.sh --stage $prepare_targets_stage \
     --train-cmd "$train_cmd" --decode-cmd "$decode_cmd" \
-    --nj 80 --reco-nj 40 --lang-test $lang_test \
+    --nj $nj --reco-nj $reco_nj --lang-test $lang_test \
     --garbage-phones-list $dir/garbage_phones.txt \
     --silence-phones-list $dir/silence_phones.txt \
+    --merge-weights "$merge_weights" \
+    --graph-dir "$graph_dir" \
     $lang $data_dir $whole_data_dir $sat_model_dir $model_dir $dir
 fi
 
 if [ $stage -le 4 ]; then
   utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}_hires_bp
-  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj 40 \
+  steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj $reco_nj \
     ${whole_data_dir}_hires_bp
   steps/compute_cmvn_stats.sh ${whole_data_dir}_hires_bp
 fi
@@ -132,7 +132,7 @@ if [ $stage -le 5 ]; then
   # Train a TDNN-LSTM network for SAD
   local/segmentation/tuning/train_lstm_asr_sad_1a.sh \
     --stage $nstage --train-stage $train_stage \
-    --targets-dir $dir \
+    --targets-dir $targets_dir \
     --data-dir ${whole_data_dir}_hires_bp
 fi
 
@@ -147,7 +147,7 @@ if [ $stage -le 6 ]; then
   steps/segmentation/detect_speech_activity.sh \
     --extra-left-context 70 --extra-right-context 0 --frames-per-chunk 150 \
     --extra-left-context-initial 0 --extra-right-context-final 0 \
-    --nj 32 --acwt 0.3 --stage $test_stage \
+    --nj $test_nj --acwt 0.3 --stage $test_stage \
     data/dev10h.pem \
     exp/segmentation_1a/tdnn_lstm_asr_sad_1a \
     mfcc_hires_bp \

diff --git a/egs/chime5/s5/local/train_lms_srilm.sh b/egs/chime5/s5/local/train_lms_srilm.sh
@@ -99,7 +99,7 @@ fi
 # Kaldi transcript files contain Utterance_ID as the first word; remove it
 # We also have to avoid skewing the LM by incorporating  the same sentences
 # from different channels
-sed -e "s/\.CH.//" -e "s/_.\-./_/" $train_text | sort -u | \
+sed -e "s/\.CH.//" -e "s/_.\-./_/" -e "s/NOLOCATION\(\.[LR]\)*-//" -e "s/U[0-9][0-9]_//" $train_text | sort -u | \
   perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/train.txt
 if (($?)); then
     echo "Failed to create $tgtdir/train.txt from $train_text"

diff --git a/egs/swbd/s5c/local/chain/run_tdnn.sh b/egs/swbd/s5c/local/chain/run_tdnn.sh
@@ -1 +1 @@
-tuning/run_tdnn_7n.sh
+tuning/run_tdnn_7o.sh