diff --git a/egs/wsj/s5/steps/make_fbank.sh b/egs/wsj/s5/steps/make_fbank.sh index 77c48be2e90..29153458f9b 100755 --- a/egs/wsj/s5/steps/make_fbank.sh +++ b/egs/wsj/s5/steps/make_fbank.sh @@ -1,6 +1,7 @@ #!/bin/bash -# Copyright 2012-2016 Karel Vesely Johns Hopkins University (Author: Daniel Povey) +# Copyright 2012-2016 Karel Vesely +# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # To be run from .. (one directory up from here) # see ../run.sh for example @@ -10,23 +11,28 @@ nj=4 cmd=run.pl fbank_config=conf/fbank.conf compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_fbank/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --fbank-config # config passed to compute-fbank-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --write-utt2num-frames # If true, write utt2num_frames file." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data +Options: + --fbank-config # config passed to compute-fbank-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -64,7 +70,7 @@ required="$scp $fbank_config" for f in $required; do if [ ! -f $f ]; then - echo "make_fbank.sh: no such file $f" + echo "$0: no such file $f" exit 1; fi done @@ -91,9 +97,15 @@ else write_num_frames_opt= fi +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -103,11 +115,11 @@ if [ -f $data/segments ]; then $cmd JOB=1:$nj $logdir/make_fbank_${name}.JOB.log \ extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \ - compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config ark:- ark:- \| \ + compute-fbank-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$fbank_config ark:- ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$fbankdir/raw_fbank_$name.JOB.ark,$fbankdir/raw_fbank_$name.JOB.scp \ || exit 1; - else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." split_scps="" @@ -118,39 +130,57 @@ else utils/split_scp.pl $scp $split_scps || exit 1; $cmd JOB=1:$nj $logdir/make_fbank_${name}.JOB.log \ - compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config scp,p:$logdir/wav.JOB.scp ark:- \| \ + compute-fbank-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$fbank_config scp,p:$logdir/wav.JOB.scp ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$fbankdir/raw_fbank_$name.JOB.ark,$fbankdir/raw_fbank_$name.JOB.scp \ || exit 1; - fi if [ -f $logdir/.error.$name ]; then - echo "Error producing fbank features for $name:" + echo "$0: Error producing filterbank features for $name:" tail $logdir/make_fbank_${name}.1.log exit 1; fi # concatenate the .scp files together. for n in $(seq $nj); do - cat $fbankdir/raw_fbank_$name.$n.scp || exit 1; -done > $data/feats.scp + cat $fbankdir/raw_fbank_$name.$n.scp || exit 1 +done > $data/feats.scp || exit 1 if $write_utt2num_frames; then for n in $(seq $nj); do - cat $logdir/utt2num_frames.$n || exit 1; + cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 - rm $logdir/utt2num_frames.* fi -rm $logdir/wav.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi + +# Store frame_shift and fbank_config along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $fbank_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && cp $fbank_config $data/conf/fbank.conf || exit 1 + +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" +fi + +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating filterbank features for $name" +echo "$0: Succeeded creating filterbank features for $name" diff --git a/egs/wsj/s5/steps/make_fbank_pitch.sh b/egs/wsj/s5/steps/make_fbank_pitch.sh index b250128fd03..7f971df54ae 100755 --- a/egs/wsj/s5/steps/make_fbank_pitch.sh +++ b/egs/wsj/s5/steps/make_fbank_pitch.sh @@ -2,7 +2,7 @@ # Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, # PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) -# 2016 Johns Hopkins University (author: Daniel Povey) +# 2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # Combine filterbank and pitch features together # Note: This file is based on make_fbank.sh and make_pitch_kaldi.sh @@ -15,26 +15,31 @@ pitch_config=conf/pitch.conf pitch_postprocess_config= paste_length_tolerance=2 compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_fbank/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --fbank-config # config passed to compute-fbank-feats " - echo " --pitch-config # config passed to compute-kaldi-pitch-feats " - echo " --pitch-postprocess-config # config passed to process-kaldi-pitch-feats " - echo " --paste-length-tolerance # length tolerance passed to paste-feats" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --write-utt2num-frames # If true, write utt2num_frames file." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data +Options: + --fbank-config # config passed to compute-fbank-feats. + --pitch-config # config passed to compute-kaldi-pitch-feats. + --pitch-postprocess-config # config passed to process-kaldi-pitch-feats. + --paste-length-tolerance # length tolerance passed to paste-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -72,19 +77,19 @@ required="$scp $fbank_config $pitch_config" for f in $required; do if [ ! -f $f ]; then - echo "make_fbank_pitch.sh: no such file $f" + echo "$0: no such file $f" exit 1; fi done +utils/validate_data_dir.sh --no-text --no-feats $data || exit 1; + if [ ! -z "$pitch_postprocess_config" ]; then postprocess_config_opt="--config=$pitch_postprocess_config"; else postprocess_config_opt= fi -utils/validate_data_dir.sh --no-text --no-feats $data || exit 1; - if [ -f $data/spk2warp ]; then echo "$0 [info]: using VTLN warp factors from $data/spk2warp" vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk" @@ -105,9 +110,15 @@ else write_num_frames_opt= fi +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -115,61 +126,89 @@ if [ -f $data/segments ]; then utils/split_scp.pl $data/segments $split_segments || exit 1; rm $logdir/.error 2>/dev/null - fbank_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config ark:- ark:- |" - pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" + fbank_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- |\ + compute-fbank-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$fbank_config ark:- ark:- |" + pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | \ + process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_fbank_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$fbank_feats" "$pitch_feats" ark:- \| \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$fbank_feats" "$pitch_feats" ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$fbank_pitch_dir/raw_fbank_pitch_$name.JOB.ark,$fbank_pitch_dir/raw_fbank_pitch_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." - split_scps="" + split_scps= for n in $(seq $nj); do - split_scps="$split_scps $logdir/wav.$n.scp" + split_scps="$split_scps $logdir/wav_${name}.$n.scp" done utils/split_scp.pl $scp $split_scps || exit 1; - fbank_feats="ark:compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config scp,p:$logdir/wav.JOB.scp ark:- |" - pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" + fbank_feats="ark:compute-fbank-feats $vtln_opts $write_utt2dur_opt \ + --verbose=2 --config=$fbank_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" + pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 \ + --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | \ + process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_fbank_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$fbank_feats" "$pitch_feats" ark:- \| \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$fbank_feats" "$pitch_feats" ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$fbank_pitch_dir/raw_fbank_pitch_$name.JOB.ark,$fbank_pitch_dir/raw_fbank_pitch_$name.JOB.scp \ || exit 1; - fi if [ -f $logdir/.error.$name ]; then - echo "Error producing fbank & pitch features for $name:" + echo "$0: Error producing filterbank and pitch features for $name:" tail $logdir/make_fbank_pitch_${name}.1.log exit 1; fi -# concatenate the .scp files together. +# Concatenate the .scp files together. for n in $(seq $nj); do - cat $fbank_pitch_dir/raw_fbank_pitch_$name.$n.scp || exit 1; -done > $data/feats.scp + cat $fbank_pitch_dir/raw_fbank_pitch_$name.$n.scp || exit 1 +done > $data/feats.scp || exit 1 if $write_utt2num_frames; then for n in $(seq $nj); do - cat $logdir/utt2num_frames.$n || exit 1; + cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 - rm $logdir/utt2num_frames.* fi -rm $logdir/wav.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi + +# Store frame_shift, fbank_config and pitch_config along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $fbank_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && + cp $fbank_config $data/conf/fbank.conf && + cp $pitch_config $data/conf/pitch.conf || exit 1 -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null + +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully processed ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" +fi + +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating filterbank & pitch features for $name" +echo "$0: Succeeded creating filterbank and pitch features for $name" diff --git a/egs/wsj/s5/steps/make_mfcc.sh b/egs/wsj/s5/steps/make_mfcc.sh index 8514ce4e38d..37433f87dcd 100755 --- a/egs/wsj/s5/steps/make_mfcc.sh +++ b/egs/wsj/s5/steps/make_mfcc.sh @@ -10,23 +10,28 @@ nj=4 cmd=run.pl mfcc_config=conf/mfcc.conf compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_mfcc/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --mfcc-config # config passed to compute-mfcc-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --write-utt2num-frames # If true, write utt2num_frames file." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data. +Options: + --mfcc-config # config passed to compute-mfcc-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -63,10 +68,11 @@ required="$scp $mfcc_config" for f in $required; do if [ ! -f $f ]; then - echo "make_mfcc.sh: no such file $f" + echo "$0: no such file $f" exit 1; fi done + utils/validate_data_dir.sh --no-text --no-feats $data || exit 1; if [ -f $data/spk2warp ]; then @@ -92,11 +98,16 @@ else write_num_frames_opt= fi +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -106,14 +117,15 @@ if [ -f $data/segments ]; then $cmd JOB=1:$nj $logdir/make_mfcc_${name}.JOB.log \ extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \ - compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- \| \ + compute-mfcc-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$mfcc_config ark:- ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." - split_scps="" + split_scps= for n in $(seq $nj); do split_scps="$split_scps $logdir/wav_${name}.$n.scp" done @@ -125,44 +137,58 @@ else # utterances that have bad wave data. $cmd JOB=1:$nj $logdir/make_mfcc_${name}.JOB.log \ - compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config \ - scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \ - copy-feats $write_num_frames_opt --compress=$compress ark:- \ + compute-mfcc-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \ + copy-feats $write_num_frames_opt --compress=$compress ark:- \ ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \ || exit 1; fi if [ -f $logdir/.error.$name ]; then - echo "Error producing mfcc features for $name:" + echo "$0: Error producing MFCC features for $name:" tail $logdir/make_mfcc_${name}.1.log exit 1; fi # concatenate the .scp files together. for n in $(seq $nj); do - cat $mfccdir/raw_mfcc_$name.$n.scp || exit 1; + cat $mfccdir/raw_mfcc_$name.$n.scp || exit 1 done > $data/feats.scp || exit 1 if $write_utt2num_frames; then for n in $(seq $nj); do - cat $logdir/utt2num_frames.$n || exit 1; + cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 - rm $logdir/utt2num_frames.* fi -rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +# Store frame_shift and mfcc_config along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $mfcc_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && cp $mfcc_config $data/conf/mfcc.conf || exit 1 + +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null + +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully processed ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" fi -if [ $nf -lt $[$nu - ($nu/20)] ]; then - echo "Less than 95% the features were successfully generated. Probably a serious error." - exit 1; +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating MFCC features for $name" + +echo "$0: Succeeded creating MFCC features for $name" diff --git a/egs/wsj/s5/steps/make_mfcc_pitch.sh b/egs/wsj/s5/steps/make_mfcc_pitch.sh index 98b670b82ae..dda31667d6a 100755 --- a/egs/wsj/s5/steps/make_mfcc_pitch.sh +++ b/egs/wsj/s5/steps/make_mfcc_pitch.sh @@ -1,7 +1,7 @@ #!/bin/bash -# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, -# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) +# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, +# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) # 2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # Combine MFCC and pitch features together @@ -15,26 +15,31 @@ pitch_config=conf/pitch.conf pitch_postprocess_config= paste_length_tolerance=2 compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_mfcc/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --mfcc-config # config passed to compute-mfcc-feats " - echo " --pitch-config # config passed to compute-kaldi-pitch-feats " - echo " --pitch-postprocess-config # config passed to process-kaldi-pitch-feats " - echo " --paste-length-tolerance # length tolerance passed to paste-feats" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --write-utt2num-frames # If true, write utt2num_frames file." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data +Options: + --mfcc-config # config passed to compute-mfcc-feats. + --pitch-config # config passed to compute-kaldi-pitch-feats. + --pitch-postprocess-config # config passed to process-kaldi-pitch-feats. + --paste-length-tolerance # length tolerance passed to paste-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -72,10 +77,11 @@ required="$scp $mfcc_config $pitch_config" for f in $required; do if [ ! -f $f ]; then - echo "make_mfcc_pitch.sh: no such file $f" + echo "$0: no such file $f" exit 1; fi done + utils/validate_data_dir.sh --no-text --no-feats $data || exit 1; if [ ! -z "$pitch_postprocess_config" ]; then @@ -104,9 +110,15 @@ else write_num_frames_opt= fi +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -114,66 +126,89 @@ if [ -f $data/segments ]; then utils/split_scp.pl $data/segments $split_segments || exit 1; rm $logdir/.error 2>/dev/null - mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |" - pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" + mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-mfcc-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$mfcc_config ark:- ark:- |" + pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | \ + process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$mfcc_feats" "$pitch_feats" ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." - split_scps="" + split_scps= for n in $(seq $nj); do split_scps="$split_scps $logdir/wav_${name}.$n.scp" done utils/split_scp.pl $scp $split_scps || exit 1; - mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" - pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" + mfcc_feats="ark:compute-mfcc-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" + pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 \ + --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | \ + process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$mfcc_feats" "$pitch_feats" ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \ || exit 1; - fi if [ -f $logdir/.error.$name ]; then - echo "Error producing mfcc & pitch features for $name:" + echo "$0: Error producing MFCC and pitch features for $name:" tail $logdir/make_mfcc_pitch_${name}.1.log exit 1; fi -# concatenate the .scp files together. +# Concatenate the .scp files together. for n in $(seq $nj); do cat $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.scp || exit 1; -done > $data/feats.scp +done > $data/feats.scp || exit 1 if $write_utt2num_frames; then for n in $(seq $nj); do - cat $logdir/utt2num_frames.$n || exit 1; + cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 - rm $logdir/utt2num_frames.* fi -rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi + +# Store frame_shift, mfcc_config and pitch_config along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $mfcc_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && + cp $mfcc_config $data/conf/mfcc.conf && + cp $pitch_config $data/conf/pitch.conf || exit 1 -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null + +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully processed ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" fi -if [ $nf -lt $[$nu - ($nu/20)] ]; then - echo "Less than 95% the features were successfully generated. Probably a serious error." - exit 1; +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating MFCC & Pitch features for $name" +echo "$0: Succeeded creating MFCC and pitch features for $name" diff --git a/egs/wsj/s5/steps/make_mfcc_pitch_online.sh b/egs/wsj/s5/steps/make_mfcc_pitch_online.sh index df51057a00b..001c1e4c6f4 100755 --- a/egs/wsj/s5/steps/make_mfcc_pitch_online.sh +++ b/egs/wsj/s5/steps/make_mfcc_pitch_online.sh @@ -1,7 +1,7 @@ #!/bin/bash -# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, -# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) +# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, +# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) # 2014-2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # Combine MFCC and online-pitch features together @@ -14,25 +14,30 @@ mfcc_config=conf/mfcc.conf online_pitch_config=conf/online_pitch.conf paste_length_tolerance=2 compress=true +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_mfcc/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --mfcc-config # config passed to compute-mfcc-feats, default " - echo " # is conf/mfcc.conf" - echo " --online-pitch-config # config passed to compute-and-process-kaldi-pitch-feats, " - echo " # default is conf/online_pitch.conf" - echo " --paste-length-tolerance # length tolerance passed to paste-feats" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data +Options: + --mfcc-config # config passed to compute-mfcc-feats [conf/mfcc.conf] + --online-pitch-config # config passed to compute-and-process-kaldi-pitch-feats [conf/online_pitch.conf] + --paste-length-tolerance # length tolerance passed to paste-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -90,9 +95,21 @@ for n in $(seq $nj); do utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.ark done +if $write_utt2num_frames; then + write_num_frames_opt="--write-num-frames=ark,t:$logdir/utt2num_frames.JOB" +else + write_num_frames_opt= +fi + +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -100,58 +117,88 @@ if [ -f $data/segments ]; then utils/split_scp.pl $data/segments $split_segments || exit 1; rm $logdir/.error 2>/dev/null - mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |" - pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-and-process-kaldi-pitch-feats --verbose=2 --config=$online_pitch_config ark:- ark:- |" + mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-mfcc-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$mfcc_config ark:- ark:- |" + pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-and-process-kaldi-pitch-feats --verbose=2 \ + --config=$online_pitch_config ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \ - copy-feats --compress=$compress ark:- \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$mfcc_feats" "$pitch_feats" ark:- \| \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." - split_scps="" + split_scps= for n in $(seq $nj); do split_scps="$split_scps $logdir/wav_${name}.$n.scp" done utils/split_scp.pl $scp $split_scps || exit 1; - mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" - pitch_feats="ark,s,cs:compute-and-process-kaldi-pitch-feats --verbose=2 --config=$online_pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" + mfcc_feats="ark:compute-mfcc-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" + pitch_feats="ark,s,cs:compute-and-process-kaldi-pitch-feats --verbose=2 \ + --config=$online_pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" $cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \ - copy-feats --compress=$compress ark:- \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$mfcc_feats" "$pitch_feats" ark:- \| \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.scp \ || exit 1; fi if [ -f $logdir/.error.$name ]; then - echo "Error producing mfcc & pitch features for $name:" + echo "$0: Error producing MFCC and online-pitch features for $name:" tail $logdir/make_mfcc_pitch_${name}.1.log exit 1; fi -# concatenate the .scp files together. +# Concatenate the .scp files together. for n in $(seq $nj); do - cat $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.scp || exit 1; -done > $data/feats.scp + cat $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.scp || exit 1 +done > $data/feats.scp || exit 1 + +if $write_utt2num_frames; then + for n in $(seq $nj); do + cat $logdir/utt2num_frames.$n || exit 1 + done > $data/utt2num_frames || exit 1 +fi -rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +# Store frame_shift, mfcc_config and pitch_config_online along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $mfcc_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && + cp $mfcc_config $data/conf/mfcc.conf && + cp $online_pitch_config $data/conf/online_pitch.conf || exit 1 + +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null + +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully processed ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" fi -if [ $nf -lt $[$nu - ($nu/20)] ]; then - echo "Less than 95% the features were successfully generated. Probably a serious error." - exit 1; +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating MFCC & online-pitch features for $name" +echo "$0: Succeeded creating MFCC and online-pitch features for $name" diff --git a/egs/wsj/s5/steps/make_plp.sh b/egs/wsj/s5/steps/make_plp.sh index 85b4a02fbb6..c4a987aaeeb 100755 --- a/egs/wsj/s5/steps/make_plp.sh +++ b/egs/wsj/s5/steps/make_plp.sh @@ -10,22 +10,28 @@ nj=4 cmd=run.pl plp_config=conf/plp.conf compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_plp/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --plp-config # config passed to compute-plp-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data +Options: + --plp-config # config passed to compute-plp-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -62,7 +68,7 @@ required="$scp $plp_config" for f in $required; do if [ ! -f $f ]; then - echo "make_plp.sh: no such file $f" + echo "$0: no such file $f" exit 1; fi done @@ -74,6 +80,8 @@ if [ -f $data/spk2warp ]; then elif [ -f $data/utt2warp ]; then echo "$0 [info]: using VTLN warp factors from $data/utt2warp" vtln_opts="--vtln-map=ark:$data/utt2warp" +else + vtln_opts= fi for n in $(seq $nj); do @@ -88,9 +96,15 @@ else write_num_frames_opt= fi +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -100,14 +114,15 @@ if [ -f $data/segments ]; then $cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \ extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \ - compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config ark:- ark:- \| \ + compute-plp-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$plp_config ark:- ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." - split_scps="" + split_scps= for n in $(seq $nj); do split_scps="$split_scps $logdir/wav_${name}.$n.scp" done @@ -115,7 +130,8 @@ else utils/split_scp.pl $scp $split_scps || exit 1; $cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \ - compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \ + compute-plp-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \ || exit 1; @@ -124,34 +140,48 @@ fi if [ -f $logdir/.error.$name ]; then - echo "Error producing plp features for $name:" + echo "$0: Error producing PLP features for $name:" tail $logdir/make_plp_${name}.1.log exit 1; fi # concatenate the .scp files together. for n in $(seq $nj); do - cat $plpdir/raw_plp_$name.$n.scp || exit 1; + cat $plpdir/raw_plp_$name.$n.scp || exit 1 done > $data/feats.scp if $write_utt2num_frames; then for n in $(seq $nj); do - cat $logdir/utt2num_frames.$n || exit 1; + cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 - rm $logdir/utt2num_frames.* fi -rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi + +# Store frame_shift and plp_config along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $plp_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && cp $plp_config $data/conf/plp.conf || exit 1 + +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" fi -if [ $nf -lt $[$nu - ($nu/20)] ]; then - echo "Less than 95% the features were successfully generated. Probably a serious error." - exit 1; + +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating PLP features for $name" +echo "$0: Succeeded creating PLP features for $name" diff --git a/egs/wsj/s5/steps/make_plp_pitch.sh b/egs/wsj/s5/steps/make_plp_pitch.sh index 40ddd314f6c..9f565d8a5bf 100755 --- a/egs/wsj/s5/steps/make_plp_pitch.sh +++ b/egs/wsj/s5/steps/make_plp_pitch.sh @@ -1,7 +1,7 @@ #!/bin/bash -# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, -# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) +# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech, +# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi) # 2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # Combine PLP and pitch features together @@ -15,25 +15,31 @@ pitch_config=conf/pitch.conf pitch_postprocess_config= paste_length_tolerance=2 compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # If true writes utt2num_frames. +write_utt2dur=true # End configuration section. -echo "$0 $@" # Print the command line for logging +echo "$0 $@" # Print the command line for logging. if [ -f ./path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then - echo "Usage: $0 [options] [ [] ]"; - echo "e.g.: $0 data/train exp/make_plp/train mfcc" - echo "Note: defaults to /log, and defaults to /data" - echo "Options: " - echo " --plp-config # config passed to compute-plp-feats " - echo " --pitch-config # config passed to compute-kaldi-pitch-feats " - echo " --pitch-postprocess-config # config passed to process-kaldi-pitch-feats " - echo " --paste-length-tolerance # length tolerance passed to paste-feats" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + cat >&2 < [ [] ] + e.g.: $0 data/train +Note: defaults to /log, and + defaults to /data +Options: + --plp-config # config passed to compute-plp-feats. + --pitch-config # config passed to compute-kaldi-pitch-feats. + --pitch-postprocess-config # config passed to process-kaldi-pitch-feats. + --paste-length-tolerance # length tolerance passed to paste-feats. + --nj # number of parallel jobs. + --cmd > # how to run jobs. + --write-utt2num-frames # If true, write utt2num_frames file. + --write-utt2dur # If true, write utt2dur file. +EOF exit 1; fi @@ -70,7 +76,7 @@ required="$scp $plp_config $pitch_config" for f in $required; do if [ ! -f $f ]; then - echo "make_plp_pitch.sh: no such file $f" + echo "$0: no such file $f" exit 1; fi done @@ -102,9 +108,15 @@ else write_num_frames_opt= fi +if $write_utt2dur; then + write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" +else + write_utt2dur_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." - split_segments="" + split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done @@ -112,67 +124,89 @@ if [ -f $data/segments ]; then utils/split_scp.pl $data/segments $split_segments || exit 1; rm $logdir/.error 2>/dev/null - plp_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config ark:- ark:- |" - pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" + plp_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-plp-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$plp_config ark:- ark:- |" + pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | \ + compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | \ + process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_plp_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$plp_feats" "$pitch_feats" ark:- \| \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$plp_feats" "$pitch_feats" ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$plp_pitch_dir/raw_plp_pitch_$name.JOB.ark,$plp_pitch_dir/raw_plp_pitch_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." - split_scps="" + split_scps= for n in $(seq $nj); do split_scps="$split_scps $logdir/wav_${name}.$n.scp" done utils/split_scp.pl $scp $split_scps || exit 1; - - plp_feats="ark:compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" - pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" + plp_feats="ark:compute-plp-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ + --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |" + pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 \ + --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | \ + process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |" $cmd JOB=1:$nj $logdir/make_plp_pitch_${name}.JOB.log \ - paste-feats --length-tolerance=$paste_length_tolerance "$plp_feats" "$pitch_feats" ark:- \| \ + paste-feats --length-tolerance=$paste_length_tolerance \ + "$plp_feats" "$pitch_feats" ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$plp_pitch_dir/raw_plp_pitch_$name.JOB.ark,$plp_pitch_dir/raw_plp_pitch_$name.JOB.scp \ || exit 1; - fi if [ -f $logdir/.error.$name ]; then - echo "Error producing plp & pitch features for $name:" + echo "$0: Error producing PLP and pitch features for $name:" tail $logdir/make_plp_pitch_${name}.1.log exit 1; fi -# concatenate the .scp files together. +# Concatenate the .scp files together. for n in $(seq $nj); do - cat $plp_pitch_dir/raw_plp_pitch_$name.$n.scp || exit 1; -done > $data/feats.scp + cat $plp_pitch_dir/raw_plp_pitch_$name.$n.scp || exit 1 +done > $data/feats.scp || exit 1 if $write_utt2num_frames; then for n in $(seq $nj); do - cat $logdir/utt2num_frames.$n || exit 1; + cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 - rm $logdir/utt2num_frames.* fi -rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null +if $write_utt2dur; then + for n in $(seq $nj); do + cat $logdir/utt2dur.$n || exit 1 + done > $data/utt2dur || exit 1 +fi + +# Store frame_shift, plp_config and pitch_config along with features. +frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { + printf "%.3f", 0.001 * $1; exit; }' $plp_config) +echo ${frame_shift:-'0.01'} > $data/frame_shift +mkdir -p $data/conf && + cp $plp_config $data/conf/plp.conf && + cp $pitch_config $data/conf/pitch.conf || exit 1 -nf=`cat $data/feats.scp | wc -l` -nu=`cat $data/utt2spk | wc -l` +rm $logdir/wav_${name}.*.scp $logdir/segments.* \ + $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null + +nf=$(wc -l < $data/feats.scp) +nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then - echo "It seems not all of the feature files were successfully processed ($nf != $nu);" - echo "consider using utils/fix_data_dir.sh $data" + echo "$0: It seems not all of the feature files were successfully procesed" \ + "($nf != $nu); consider using utils/fix_data_dir.sh $data" fi -if [ $nf -lt $[$nu - ($nu/20)] ]; then - echo "Less than 95% the features were successfully generated. Probably a serious error." - exit 1; +if (( nf < nu - nu/20 )); then + echo "$0: Less than 95% the features were successfully generated."\ + "Probably a serious error." + exit 1 fi -echo "Succeeded creating PLP & Pitch features for $name" +echo "$0: Succeeded creating PLP and pitch features for $name" diff --git a/src/featbin/compute-fbank-feats.cc b/src/featbin/compute-fbank-feats.cc index 41df621d62d..e52b30bafb6 100644 --- a/src/featbin/compute-fbank-feats.cc +++ b/src/featbin/compute-fbank-feats.cc @@ -19,9 +19,9 @@ // limitations under the License. #include "base/kaldi-common.h" -#include "util/common-utils.h" #include "feat/feature-fbank.h" #include "feat/wave-reader.h" +#include "util/common-utils.h" int main(int argc, char *argv[]) { @@ -29,35 +29,42 @@ int main(int argc, char *argv[]) { using namespace kaldi; const char *usage = "Create Mel-filter bank (FBANK) feature files.\n" - "Usage: compute-fbank-feats [options...] \n"; + "Usage: compute-fbank-feats [options...] " + "\n"; - // construct all the global objects + // Construct all the global objects. ParseOptions po(usage); FbankOptions fbank_opts; + // Define defaults for global options. bool subtract_mean = false; BaseFloat vtln_warp = 1.0; std::string vtln_map_rspecifier; std::string utt2spk_rspecifier; int32 channel = -1; BaseFloat min_duration = 0.0; - // Define defaults for gobal options std::string output_format = "kaldi"; + std::string utt2dur_wspecifier; - // Register the option struct + // Register the option struct. fbank_opts.Register(&po); - // Register the options - po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]"); - po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. "); - po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)"); - po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)"); - po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)"); - po.Register("min-duration", &min_duration, "Minimum duration of segments to process (in seconds)."); - - // OPTION PARSING .......................................................... - // - - // parse options (+filling the registered variables) + // Register the options. + po.Register("output-format", &output_format, + "Format of the output files [kaldi, htk]"); + po.Register("subtract-mean", &subtract_mean, "Subtract mean of each " + "feature file [CMS]; not recommended to do it this way. "); + po.Register("vtln-warp", &vtln_warp, + "Vtln warp factor (only applicable if vtln-map not specified)"); + po.Register("vtln-map", &vtln_map_rspecifier,"Map from utterance or " + "speaker-id to vtln warp factor (rspecifier)"); + po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map " + "(if doing VTLN and you have warps per speaker)"); + po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, " + "0 -> left, 1 -> right)"); + po.Register("min-duration", &min_duration, "Minimum duration of segments " + "to process (in seconds)."); + po.Register("write-utt2dur", &utt2dur_wspecifier, "Wspecifier to write " + "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'."); + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -71,16 +78,16 @@ int main(int argc, char *argv[]) { Fbank fbank(fbank_opts); + if (utt2spk_rspecifier != "" && vtln_map_rspecifier != "") + KALDI_ERR << ("The --utt2spk option is only needed if " + "the --vtln-map option is used."); + RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, + utt2spk_rspecifier); + SequentialTableReader reader(wav_rspecifier); BaseFloatMatrixWriter kaldi_writer; // typedef to TableWriter. TableWriter htk_writer; - if (utt2spk_rspecifier != "") - KALDI_ASSERT(vtln_map_rspecifier != "" && "the utt2spk option is only " - "needed if the vtln-map option is used."); - RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, - utt2spk_rspecifier); - if (output_format == "kaldi") { if (!kaldi_writer.Open(output_wspecifier)) KALDI_ERR << "Could not initialize output with wspecifier " @@ -93,6 +100,8 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Invalid output_format string " << output_format; } + DoubleWriter utt2dur_writer(utt2dur_wspecifier); + int32 num_utts = 0, num_success = 0; for (; !reader.Done(); reader.Next()) { num_utts++; @@ -105,7 +114,7 @@ int main(int argc, char *argv[]) { } int32 num_chan = wave_data.Data().NumRows(), this_chan = channel; { // This block works out the channel (0=left, 1=right...) - KALDI_ASSERT(num_chan > 0); // should have been caught in + KALDI_ASSERT(num_chan > 0); // This should have been caught in // reading code if no channels. if (channel == -1) { this_chan = 0; @@ -136,10 +145,10 @@ int main(int argc, char *argv[]) { SubVector waveform(wave_data.Data(), this_chan); Matrix features; try { - fbank.ComputeFeatures(waveform, wave_data.SampFreq(), vtln_warp_local, &features); + fbank.ComputeFeatures(waveform, wave_data.SampFreq(), + vtln_warp_local, &features); } catch (...) { - KALDI_WARN << "Failed to compute features for utterance " - << utt; + KALDI_WARN << "Failed to compute features for utterance " << utt; continue; } if (subtract_mean) { @@ -165,6 +174,9 @@ int main(int argc, char *argv[]) { p.second = header; htk_writer.Write(utt, p); } + if (utt2dur_writer.IsOpen()) { + utt2dur_writer.Write(utt, wave_data.Duration()); + } if (num_utts % 10 == 0) KALDI_LOG << "Processed " << num_utts << " utterances"; KALDI_VLOG(2) << "Processed features for key " << utt; @@ -177,6 +189,4 @@ int main(int argc, char *argv[]) { std::cerr << e.what(); return -1; } - return 0; } - diff --git a/src/featbin/compute-mfcc-feats.cc b/src/featbin/compute-mfcc-feats.cc index 09efcd38dd0..0827d0a9360 100644 --- a/src/featbin/compute-mfcc-feats.cc +++ b/src/featbin/compute-mfcc-feats.cc @@ -19,33 +19,35 @@ // limitations under the License. #include "base/kaldi-common.h" -#include "util/common-utils.h" #include "feat/feature-mfcc.h" #include "feat/wave-reader.h" +#include "util/common-utils.h" int main(int argc, char *argv[]) { try { using namespace kaldi; const char *usage = "Create MFCC feature files.\n" - "Usage: compute-mfcc-feats [options...] \n"; + "Usage: compute-mfcc-feats [options...] " + "\n"; - // construct all the global objects + // Construct all the global objects. ParseOptions po(usage); MfccOptions mfcc_opts; + // Define defaults for global options. bool subtract_mean = false; BaseFloat vtln_warp = 1.0; std::string vtln_map_rspecifier; std::string utt2spk_rspecifier; int32 channel = -1; BaseFloat min_duration = 0.0; - // Define defaults for gobal options std::string output_format = "kaldi"; + std::string utt2dur_wspecifier; - // Register the MFCC option struct + // Register the MFCC option struct. mfcc_opts.Register(&po); - // Register the options + // Register the options. po.Register("output-format", &output_format, "Format of the output " "files [kaldi, htk]"); po.Register("subtract-mean", &subtract_mean, "Subtract mean of each " @@ -60,6 +62,8 @@ int main(int argc, char *argv[]) { "0 -> left, 1 -> right)"); po.Register("min-duration", &min_duration, "Minimum duration of segments " "to process (in seconds)."); + po.Register("write-utt2dur", &utt2dur_wspecifier, "Wspecifier to write " + "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'."); po.Read(argc, argv); @@ -74,16 +78,16 @@ int main(int argc, char *argv[]) { Mfcc mfcc(mfcc_opts); + if (utt2spk_rspecifier != "" && vtln_map_rspecifier != "") + KALDI_ERR << ("The --utt2spk option is only needed if " + "the --vtln-map option is used."); + RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, + utt2spk_rspecifier); + SequentialTableReader reader(wav_rspecifier); BaseFloatMatrixWriter kaldi_writer; // typedef to TableWriter. TableWriter htk_writer; - if (utt2spk_rspecifier != "") - KALDI_ASSERT(vtln_map_rspecifier != "" && "the utt2spk option is only " - "needed if the vtln-map option is used."); - RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, - utt2spk_rspecifier); - if (output_format == "kaldi") { if (!kaldi_writer.Open(output_wspecifier)) KALDI_ERR << "Could not initialize output with wspecifier " @@ -96,6 +100,8 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Invalid output_format string " << output_format; } + DoubleWriter utt2dur_writer(utt2dur_wspecifier); + int32 num_utts = 0, num_success = 0; for (; !reader.Done(); reader.Next()) { num_utts++; @@ -139,10 +145,10 @@ int main(int argc, char *argv[]) { SubVector waveform(wave_data.Data(), this_chan); Matrix features; try { - mfcc.ComputeFeatures(waveform, wave_data.SampFreq(), vtln_warp_local, &features); + mfcc.ComputeFeatures(waveform, wave_data.SampFreq(), + vtln_warp_local, &features); } catch (...) { - KALDI_WARN << "Failed to compute features for utterance " - << utt; + KALDI_WARN << "Failed to compute features for utterance " << utt; continue; } if (subtract_mean) { @@ -168,6 +174,9 @@ int main(int argc, char *argv[]) { p.second = header; htk_writer.Write(utt, p); } + if (utt2dur_writer.IsOpen()) { + utt2dur_writer.Write(utt, wave_data.Duration()); + } if (num_utts % 10 == 0) KALDI_LOG << "Processed " << num_utts << " utterances"; KALDI_VLOG(2) << "Processed features for key " << utt; @@ -181,4 +190,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/featbin/compute-plp-feats.cc b/src/featbin/compute-plp-feats.cc index 3e9fe9d7423..5c3b9843b4d 100644 --- a/src/featbin/compute-plp-feats.cc +++ b/src/featbin/compute-plp-feats.cc @@ -19,9 +19,9 @@ // limitations under the License. #include "base/kaldi-common.h" -#include "util/common-utils.h" #include "feat/feature-plp.h" #include "feat/wave-reader.h" +#include "util/common-utils.h" int main(int argc, char *argv[]) { @@ -29,21 +29,23 @@ int main(int argc, char *argv[]) { using namespace kaldi; const char *usage = "Create PLP feature files.\n" - "Usage: compute-plp-feats [options...] \n"; + "Usage: compute-plp-feats [options...] " + "\n"; - // construct all the global objects + // Construct all the global objects. ParseOptions po(usage); PlpOptions plp_opts; + // Define defaults for global options. bool subtract_mean = false; BaseFloat vtln_warp = 1.0; std::string vtln_map_rspecifier; std::string utt2spk_rspecifier; int32 channel = -1; BaseFloat min_duration = 0.0; - // Define defaults for gobal options std::string output_format = "kaldi"; + std::string utt2dur_wspecifier; - // Register the options + // Register the options. po.Register("output-format", &output_format, "Format of the output " "files [kaldi, htk]"); po.Register("subtract-mean", &subtract_mean, "Subtract mean of each " @@ -58,11 +60,13 @@ int main(int argc, char *argv[]) { "0 -> left, 1 -> right)"); po.Register("min-duration", &min_duration, "Minimum duration of segments " "to process (in seconds)."); + po.Register("write-utt2dur", &utt2dur_wspecifier, "Wspecifier to write " + "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'."); plp_opts.Register(&po); po.Read(argc, argv); - + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); @@ -74,16 +78,16 @@ int main(int argc, char *argv[]) { Plp plp(plp_opts); + if (utt2spk_rspecifier != "" && vtln_map_rspecifier != "") + KALDI_ERR << ("The --utt2spk option is only needed if " + "the --vtln-map option is used."); + RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, + utt2spk_rspecifier); + SequentialTableReader reader(wav_rspecifier); BaseFloatMatrixWriter kaldi_writer; // typedef to TableWriter. TableWriter htk_writer; - if (utt2spk_rspecifier != "") - KALDI_ASSERT(vtln_map_rspecifier != "" && "the utt2spk option is only " - "needed if the vtln-map option is used."); - RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, - utt2spk_rspecifier); - if (output_format == "kaldi") { if (!kaldi_writer.Open(output_wspecifier)) KALDI_ERR << "Could not initialize output with wspecifier " @@ -96,6 +100,8 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Invalid output_format string " << output_format; } + DoubleWriter utt2dur_writer(utt2dur_wspecifier); + int32 num_utts = 0, num_success = 0; for (; !reader.Done(); reader.Next()) { num_utts++; @@ -107,8 +113,8 @@ int main(int argc, char *argv[]) { continue; } int32 num_chan = wave_data.Data().NumRows(), this_chan = channel; - { // This block works out the channel (0=left, 1=right...) - KALDI_ASSERT(num_chan > 0); // should have been caught in + { // This block works out the channel (0=left, 1=right...). + KALDI_ASSERT(num_chan > 0); // This should have been caught in // reading code if no channels. if (channel == -1) { this_chan = 0; @@ -139,10 +145,10 @@ int main(int argc, char *argv[]) { SubVector waveform(wave_data.Data(), this_chan); Matrix features; try { - plp.ComputeFeatures(waveform, wave_data.SampFreq(), vtln_warp_local, &features); + plp.ComputeFeatures(waveform, wave_data.SampFreq(), + vtln_warp_local, &features); } catch (...) { - KALDI_WARN << "Failed to compute features for utterance " - << utt; + KALDI_WARN << "Failed to compute features for utterance " << utt; continue; } if (subtract_mean) { @@ -168,6 +174,9 @@ int main(int argc, char *argv[]) { p.second = header; htk_writer.Write(utt, p); } + if (utt2dur_writer.IsOpen()) { + utt2dur_writer.Write(utt, wave_data.Duration()); + } if (num_utts % 10 == 0) KALDI_LOG << "Processed " << num_utts << " utterances"; KALDI_VLOG(2) << "Processed features for key " << utt; @@ -181,4 +190,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/featbin/compute-spectrogram-feats.cc b/src/featbin/compute-spectrogram-feats.cc index 3b40a6fa5c7..67932915278 100644 --- a/src/featbin/compute-spectrogram-feats.cc +++ b/src/featbin/compute-spectrogram-feats.cc @@ -18,9 +18,9 @@ // limitations under the License. #include "base/kaldi-common.h" -#include "util/common-utils.h" #include "feat/feature-spectrogram.h" #include "feat/wave-reader.h" +#include "util/common-utils.h" int main(int argc, char *argv[]) { @@ -28,29 +28,33 @@ int main(int argc, char *argv[]) { using namespace kaldi; const char *usage = "Create spectrogram feature files.\n" - "Usage: compute-spectrogram-feats [options...] \n"; + "Usage: compute-spectrogram-feats [options...] " + "\n"; - // construct all the global objects + // Construct all the global objects. ParseOptions po(usage); SpectrogramOptions spec_opts; + // Define defaults for global options. bool subtract_mean = false; int32 channel = -1; BaseFloat min_duration = 0.0; - // Define defaults for gobal options std::string output_format = "kaldi"; + std::string utt2dur_wspecifier; // Register the option struct spec_opts.Register(&po); // Register the options - po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]"); - po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. "); - po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)"); - po.Register("min-duration", &min_duration, "Minimum duration of segments to process (in seconds)."); - - // OPTION PARSING .......................................................... - // + po.Register("output-format", &output_format, + "Format of the output files [kaldi, htk]"); + po.Register("subtract-mean", &subtract_mean, "Subtract mean of each " + "feature file [CMS]; not recommended to do it this way. "); + po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, " + "0 -> left, 1 -> right)"); + po.Register("min-duration", &min_duration, "Minimum duration of segments " + "to process (in seconds)."); + po.Register("write-utt2dur", &utt2dur_wspecifier, "Wspecifier to write " + "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'."); - // parse options (+filling the registered variables) po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -80,6 +84,8 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Invalid output_format string " << output_format; } + DoubleWriter utt2dur_writer(utt2dur_wspecifier); + int32 num_utts = 0, num_success = 0; for (; !reader.Done(); reader.Next()) { num_utts++; @@ -114,8 +120,7 @@ int main(int argc, char *argv[]) { try { spec.ComputeFeatures(waveform, wave_data.SampFreq(), 1.0, &features); } catch (...) { - KALDI_WARN << "Failed to compute features for utterance " - << utt; + KALDI_WARN << "Failed to compute features for utterance " << utt; continue; } if (subtract_mean) { @@ -141,6 +146,9 @@ int main(int argc, char *argv[]) { p.second = header; htk_writer.Write(utt, p); } + if (utt2dur_writer.IsOpen()) { + utt2dur_writer.Write(utt, wave_data.Duration()); + } if(num_utts % 10 == 0) KALDI_LOG << "Processed " << num_utts << " utterances"; KALDI_VLOG(2) << "Processed features for key " << utt; @@ -153,6 +161,4 @@ int main(int argc, char *argv[]) { std::cerr << e.what(); return -1; } - return 0; } -