diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh index 28aa15bb604..e1fca60a2dd 100755 --- a/egs/reverb/s5/local/get_results.sh +++ b/egs/reverb/s5/local/get_results.sh @@ -3,7 +3,6 @@ # "Our baselines" echo "########################################" echo "GMM RESULTs:" -dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit echo "exp/tri3/decode_dt_real_1ch" cat exp/tri3/decode_dt_real_1ch/scoring_kaldi/best_wer* echo "" @@ -16,6 +15,30 @@ echo "" echo "exp/tri3/decode_et_simu_1ch" cat exp/tri3/decode_et_simu_1ch/scoring_kaldi/best_wer* echo "" +echo "exp/tri3/decode_dt_real_2ch_wpe" +cat exp/tri3/decode_dt_real_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_2ch_wpe" +cat exp/tri3/decode_dt_simu_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_2ch_wpe" +cat exp/tri3/decode_et_real_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_2ch_wpe" +cat exp/tri3/decode_et_simu_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_real_8ch_wpe" +cat exp/tri3/decode_dt_real_8ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_8ch_wpe" +cat exp/tri3/decode_dt_simu_8ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_8ch_wpe" +cat exp/tri3/decode_et_real_8ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_8ch_wpe" +cat exp/tri3/decode_et_simu_8ch_wpe/scoring_kaldi/best_wer* +echo "" echo "exp/tri3/decode_dt_real_2ch_beamformit" cat exp/tri3/decode_dt_real_2ch_beamformit/scoring_kaldi/best_wer* echo "" diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh index 2880d4bb195..2da51b9786b 100755 --- a/egs/reverb/s5/local/prepare_real_data.sh +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -81,10 +81,10 @@ for nch in 1 2 8; do for task in dt et; do if [ ${task} == 'dt' ]; then audiodir=${reverb}/MC_WSJ_AV_Dev - audiodir_wpe=${wavdir}/WPE/MC_WSJ_AV_Dev + audiodir_wpe=${wavdir}/WPE/${nch}ch/MC_WSJ_AV_Dev elif [ ${task} == 'et' ]; then audiodir=${reverb}/MC_WSJ_AV_Eval - audiodir_wpe=${wavdir}/WPE/MC_WSJ_AV_Eval + audiodir_wpe=${wavdir}/WPE/${nch}ch/MC_WSJ_AV_Eval fi for x in `ls ${taskdir} | grep RealData | grep _${task}_`; do perl -se 'while(<>){m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; $id = lc $1; print "$id $dir$_";}' -- -dir=${audiodir} ${taskdir}/$x |\ @@ -117,19 +117,23 @@ for nch in 1 2 8; do for task in dt et; do datadir=data/${task}_real_${nch}ch mkdir -p ${datadir} - sort ${dir}/${task}_real_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_real_${nch}ch_wav.scp > ${datadir}/wav.scp sort ${dir}/${task}_real_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_real_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_${nch}ch.spk2utt > ${datadir}/spk2utt ./utils/fix_data_dir.sh ${datadir} - done -done - -for nch in 2 8; do - for task in dt et; do - datadir=data/${task}_real_${nch}ch_beamformit + if [ ${nch} != 1 ]; then + datadir=data/${task}_real_${nch}ch_beamformit + mkdir -p ${datadir} + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch.txt > ${datadir}/text + sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + fi + datadir=data/${task}_real_${nch}ch_wpe mkdir -p ${datadir} - sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp sort ${dir}/${task}_real_1ch.txt > ${datadir}/text sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index b5c6001062c..cfdc226dd1e 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -50,7 +50,7 @@ for nch in 1 2 8; do sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wav.scp for task in dt et; do - for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${reverb}/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wav.scp @@ -58,19 +58,26 @@ for nch in 1 2 8; do task=tr for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do - perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/${nch}ch/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp for task in dt et; do - for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do - perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/${nch}ch/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp done # make a transcript - for task in tr dt et; do - for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + task=tr + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ + perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch.trans1 || exit 1; + cat ${dir}/${task}_simu_${nch}ch.trans1 | local/normalize_transcript.pl ${noiseword} > ${dir}/${task}_simu_${nch}ch.txt || exit 1; + for task in dt et; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ sed -e "s/^\(...\)/\1_${x}_\1/" @@ -90,26 +97,28 @@ for nch in 1 2 8; do for task in tr dt et; do datadir=data/${task}_simu_${nch}ch mkdir -p ${datadir} - if [ ${task} == 'tr' ]; then - sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp - else - sort ${dir}/${task}_simu_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp - fi + sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp sort ${dir}/${task}_simu_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_simu_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_${nch}ch.spk2utt > ${datadir}/spk2utt ./utils/fix_data_dir.sh ${datadir} - done -done - -for nch in 2 8; do - for task in dt et; do - datadir=data/${task}_simu_${nch}ch_beamformit - mkdir -p ${datadir} - sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" > ${datadir}/wav.scp - sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text - sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk - sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt - ./utils/fix_data_dir.sh ${datadir} + if [ ${task} != 'tr' ]; then + datadir=data/${task}_simu_${nch}ch_wpe + mkdir -p ${datadir} + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + if [ ${nch} != 1 ]; then + datadir=data/${task}_simu_${nch}ch_beamformit + mkdir -p ${datadir} + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + fi + fi done done diff --git a/egs/reverb/s5/local/run_beamform.sh b/egs/reverb/s5/local/run_beamform.sh index 0549b2e34f2..1c8aade7287 100755 --- a/egs/reverb/s5/local/run_beamform.sh +++ b/egs/reverb/s5/local/run_beamform.sh @@ -7,7 +7,7 @@ . ./path.sh # Config: -nj=20 +nj=50 cmd=run.pl . utils/parse_options.sh || exit 1; diff --git a/egs/reverb/s5/local/run_wpe.sh b/egs/reverb/s5/local/run_wpe.sh index 77ff6fffb31..d1ea56c6c55 100755 --- a/egs/reverb/s5/local/run_wpe.sh +++ b/egs/reverb/s5/local/run_wpe.sh @@ -6,7 +6,7 @@ . ./path.sh # Config: -nj=20 +nj=50 cmd=run.pl . utils/parse_options.sh || exit 1; @@ -104,7 +104,7 @@ for task in dt et; do done done -for task in tr dt et; do +for task in dt et; do for nch in 1 2 8; do wdir=exp/wpe_simu_${task}_${nch}ch mkdir -p $wdir/log diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 39c56207da7..3cc3efca9e7 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -57,7 +57,7 @@ fi #training set and test set train_set=tr_simu_8ch -test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit" +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe" # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k"