From 43a3921757108b33470d114ae32359d098aefa85 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Fri, 31 Aug 2018 11:50:52 -0400 Subject: [PATCH 01/39] add and delete files --- egs/chime5/s5/path.sh | 1 + egs/reverb/s5/conf/fbank.conf | 4 +- .../s5/local/Generate_mcTrainData_cut.m | 23 ++- egs/reverb/s5/local/REVERB_create_mcdata.sh | 74 -------- .../s5/local/REVERB_mcwsjav_data_prep.sh | 165 ------------------ .../s5/local/REVERB_wsjcam0_data_prep.sh | 117 ------------- egs/reverb/s5/local/check_tools.sh | 53 ++++++ egs/reverb/s5/local/generate_data.sh | 84 +++++++++ egs/reverb/s5/local/prepare_real_data.sh | 119 +++++++++++++ egs/reverb/s5/local/prepare_simu_data.sh | 87 +++++++++ egs/reverb/s5/local/score_mbr.sh | 1 - egs/reverb/s5/path.sh | 2 + egs/reverb/s5/run.sh | 115 ++++++------ 13 files changed, 415 insertions(+), 430 deletions(-) delete mode 100755 egs/reverb/s5/local/REVERB_create_mcdata.sh delete mode 100755 egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh delete mode 100755 egs/reverb/s5/local/REVERB_wsjcam0_data_prep.sh create mode 100755 egs/reverb/s5/local/check_tools.sh create mode 100755 egs/reverb/s5/local/generate_data.sh create mode 100755 egs/reverb/s5/local/prepare_real_data.sh create mode 100755 egs/reverb/s5/local/prepare_simu_data.sh delete mode 120000 egs/reverb/s5/local/score_mbr.sh diff --git a/egs/chime5/s5/path.sh b/egs/chime5/s5/path.sh index fb1c0489386..6fc01352d90 100644 --- a/egs/chime5/s5/path.sh +++ b/egs/chime5/s5/path.sh @@ -1,5 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +[ -f $KALDI_ROOT/tools/extras/env.sh ] && . $KALDI_ROOT/tools/extras/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh diff --git a/egs/reverb/s5/conf/fbank.conf b/egs/reverb/s5/conf/fbank.conf index c4b73674cab..82ac7bd0dbc 100644 --- a/egs/reverb/s5/conf/fbank.conf +++ b/egs/reverb/s5/conf/fbank.conf @@ -1,2 +1,2 @@ -# No non-default options for now. - +--sample-frequency=16000 +--num-mel-bins=80 diff --git a/egs/reverb/s5/local/Generate_mcTrainData_cut.m b/egs/reverb/s5/local/Generate_mcTrainData_cut.m index cc01ff89b7d..e6d7d95550a 100755 --- a/egs/reverb/s5/local/Generate_mcTrainData_cut.m +++ b/egs/reverb/s5/local/Generate_mcTrainData_cut.m @@ -1,13 +1,13 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir) % % Input variables: -% WSJ_dir_name: string name of user's clean wsjcam0 corpus directory -% (*Directory structure for wsjcam0 corpushas to be kept as it is after obtaining it from LDC. +% WSJ_dir_name: string name of WAV file directory converted from original wsjcam0 SPHERE files +% (*Directory structure for wsjcam0 corpus to be kept as it is after obtaining it from LDC. % Otherwise this script does not work.) % % This function generates multi-condition traiing data % based on the following items: -% 1. wsjcam0 corpus (distributed from the LDC) +% 1. wsjcam0 corpus (WAV files) % 2. room impulse responses (ones under ./RIR/) % 3. noise (ones under ./NOISE/). % Generated data has the same directory structure as original wsjcam0 corpus. @@ -26,8 +26,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir) display(['Name of directory for original wsjcam0: ',WSJ_dir_name]) display(['Name of directory to save generated multi-condition training data: ',save_dir]) -unix(['chmod u+x sphere_to_wave.csh']); -unix(['chmod u+x bin/*']); % Parameters related to acoustic conditions SNRdB=20; @@ -114,13 +112,12 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir) end prev_fname=fname(1:idx1(end)); - % load (sphere format) speech signal - x=read_sphere([WSJ_dir_name,'/data/', fname]); - x=x/(2^15); % conversion from short-int to float + % load speech signal + x=audioread([WSJ_dir_name, '/data/', fname, '.wav'])'; % load RIR and noise for "THIS" utterance - eval(['RIR=wavread(RIR_sim',num2str(rcount),');']); - eval(['NOISE=wavread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']); + eval(['RIR=audioread(RIR_sim',num2str(rcount),');']); + eval(['NOISE=audioread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']); % Generate 8ch noisy reverberant data y=gen_obs(x,RIR,NOISE,SNRdB); @@ -138,8 +135,10 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir) y=y/4; % common normalization to all the data to prevent clipping % denominator was decided experimentally - for ch=1:8 - eval(['wavwrite(y(:,',num2str(ch),'),16000,''',save_dir_tr fname,'_ch',num2str(ch),'.wav'');']); + for ch=1:8 + outfilename = [save_dir_tr, fname, '_ch', num2str(ch), '.wav']; + %eval(['audiowrite(y(:,',num2str(ch),'),16000,''',save_dir_tr fname,'_ch',num2str(ch),'.wav'');']); + eval(['audiowrite(outfilename, y(:,',num2str(ch),'), 16000);']); end display(['sentence ',num2str(fcount),' (out of 7861) finished! (Multi-condition training data)']) diff --git a/egs/reverb/s5/local/REVERB_create_mcdata.sh b/egs/reverb/s5/local/REVERB_create_mcdata.sh deleted file mode 100755 index 4cc776aa159..00000000000 --- a/egs/reverb/s5/local/REVERB_create_mcdata.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# Copyright 2013 MERL (author: Shinji Watanabe) -# Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey) - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -if [ $# -ne 2 ]; then - printf "\nUSAGE: %s \n\n" `basename $0` - echo "e.g.,:" - echo " `basename $0` /archive/speech-db/processed/public/REVERB/wsjcam0 data_mc_tr" - exit 1; -fi - -wsjcam0_dir=$1 -reverb_tr_dir=$2 - -dir=`pwd`/data/local/reverb_tools -mkdir -p $dir $reverb_tr_dir -lmdir=`pwd`/data/local/nist_lm - -# Download tools -URL1="http://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_mcTrainData.tgz" -URL2="http://reverb2014.dereverberation.com/tools/REVERB_TOOLS_FOR_ASR_ver2.0.tgz" -for f in $URL1 $URL2; do - x=`basename $f` - if [ ! -e $dir/$x ]; then - wget $f -O $dir/$x || exit 1; - tar zxvf $dir/$x -C $dir || exit 1; - fi -done -URL3="http://reverb2014.dereverberation.com/tools/taskFiles_et.tgz" -x=`basename $URL3` -if [ ! -e $dir/$x ]; then - wget $URL3 -O $dir/$x || exit 1; - tar zxvf $dir/$x -C $dir || exit 1; - cp -fr $dir/`basename $x .tgz`/* $dir/ReleasePackage/reverb_tools_for_asr_ver2.0/taskFiles/ -fi - -# Download and install nist tools -pushd $dir/ReleasePackage/reverb_tools_for_asr_ver2.0 -perl -ape "s|^main$|targetSPHEREDir\=tools/SPHERE\ninstall_nist|;" installTools > installnist -chmod u+x installnist -./installnist -popd - -# Make mcTrainData -cp local/Generate_mcTrainData_cut.m $dir/reverb_tools_for_Generate_mcTrainData/ -pushd $dir/reverb_tools_for_Generate_mcTrainData/ -# copied nist tools required for the following matlab command -cp $dir/ReleasePackage/reverb_tools_for_asr_ver2.0/tools/SPHERE/nist/bin/{h_strip,w_decode} ./bin/ - -tmpdir=`mktemp -d tempXXXXX ` -tmpmfile=$tmpdir/run_mat.m -cat < $tmpmfile -addpath(genpath('.')) -Generate_mcTrainData_cut('$wsjcam0_dir', '$reverb_tr_dir'); -EOF -cat $tmpmfile | matlab -nodisplay -rm -rf $tmpdir -popd - -echo "Successfully generated multi-condition training data and stored it in $reverb_tr_dir." && exit 0; diff --git a/egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh b/egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh deleted file mode 100755 index a4599f97702..00000000000 --- a/egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/bash - -# Copyright 2013 MERL (author: Felix Weninger) -# Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey) - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - - -# for REVERB challenge: - -dir=`pwd`/data/local/data -lmdir=`pwd`/data/local/nist_lm -mkdir -p $dir $lmdir -local=`pwd`/local -utils=`pwd`/utils -root=`pwd` - -. ./path.sh # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi - -cd $dir - -MIC=primary - -# input corpus (original or processed, tr or dt, etc.) -RWSJ=$1 -if [ ! -d "$RWSJ" ]; then - echo Could not find directory $RWSJ! Check pathnames in corpus.sh! - exit 1 -fi - -mcwsjav_mlf=$RWSJ/mlf/WSJ.mlf -if [ ! -z "$4" ]; then - mcwsjav_mlf=$4 -fi - -# the name of the dataset to be created -dataset=REVERB_Real_dt - -# the WSJCAM0 set that the set is based on (tr, dt, ...) -# this will be used to find the correct transcriptions etc. -dt_or_x=dt - -if [ ! -z "$2" ]; then - dataset=$2 -fi -# dt or et -if [ ! -z "$3" ]; then - dt_or_x=$3 -fi - -# unfortunately, we need a pointer to HTK baseline -# since the corpus does NOT contain the data set descriptions -# for the REVERB Challenge - -taskFileDir=$dir/../reverb_tools/ReleasePackage/reverb_tools_for_asr_ver2.0/taskFiles/1ch -#taskFiles=`ls $taskFileDir/*Data_dt_for_*` -taskFiles=`ls $taskFileDir/RealData_${dt_or_x}_for_1ch_{far,near}*` - -dir2=$dir/$dataset -mkdir -p $dir2 - -for taskFile in $taskFiles; do - -set=`basename $taskFile` - - -echo $mcwsjav_mlf - -# MLF transcription correction -# taken from HTK baseline script -sed -e ' -# dos to unix line feed conversion -s/\x0D$//' \ --e " - s/\x60//g # remove unicode character grave accent. - " \ --e " - # fix the single quote for the word yield - # and the quoted ROOTS - # e.g. yield' --> yield - # reason: YIELD' is not in dict, while YIELD is - s/YIELD'/YIELD/g - s/'ROOTS'/ROOTS/g - s/'WHERE/WHERE/g - s/PEOPLE'/PEOPLE/g - s/SIT'/SIT/g - s/'DOMINEE/DOMINEE/g - s/CHURCH'/CHURCH/g" \ --e ' - # fix the single missing double full stop issue at the end of an utterance - # e.g. I. C. N should be I. C. N. - # reason: N is not in dict, while N. is - /^[A-Z]$/ { - # append a line - N - # search for single dot on the second line - /\n\./ { - # found it - now replace the - s/\([A-Z]\)\n\./\1\.\n\./ - } - }' \ -$mcwsjav_mlf |\ -perl $local/mlf2text.pl > $dir2/$set.txt1 - -#exit - -#taskFile=$taskFileDir/$set -# contains pointer to wav files with relative path --> add absolute path -echo taskFile = $taskFile -awk '{print "'$RWSJ'"$1}' < $taskFile > $dir2/${set}.flist || exit 1; - -# this is like flist2scp.pl but it can take wav file list as input -(perl -e 'while(<>){ - m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; - $id = lc $1; - print "$id $_"; -}' < $dir2/$set.flist || exit 1) | sort > $dir2/${set}_wav.scp - - -# Make the utt2spk and spk2utt files. -cat $dir2/${set}_wav.scp | awk '{print $1, $1}' > $dir2/$set.utt2spk || exit 1; -cat $dir2/$set.utt2spk | $utils/utt2spk_to_spk2utt.pl > $dir2/$set.spk2utt || exit 1; - -awk '{print $1}' < $dir2/$set.utt2spk |\ -$local/find_transcripts_txt.pl $dir2/$set.txt1 | sort | uniq > $dir2/$set.txt -#rm $dir2/$set.txt1 - -# Create directory structure required by decoding scripts - -cd $root -mkdir -p data/$dataset/$set -cp $dir2/${set}_wav.scp data/$dataset/$set/wav.scp || exit 1; -cp $dir2/$set.txt data/$dataset/$set/text || exit 1; -cp $dir2/$set.spk2utt data/$dataset/$set/spk2utt || exit 1; -cp $dir2/$set.utt2spk data/$dataset/$set/utt2spk || exit 1; - -echo "Data preparation for $set succeeded" -#echo "Put files into $dir2/$set.*" - - -mfccdir=mfcc/$dataset -#for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do -#for x in si_tr; do -steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 \ - data/$dataset/$set exp/make_mfcc/$dataset/$set $mfccdir || exit 1; -steps/compute_cmvn_stats.sh data/$dataset/$set exp/make_mfcc/$dataset/$set $mfccdir || exit 1; - -done diff --git a/egs/reverb/s5/local/REVERB_wsjcam0_data_prep.sh b/egs/reverb/s5/local/REVERB_wsjcam0_data_prep.sh deleted file mode 100755 index 6ab2f2f4b73..00000000000 --- a/egs/reverb/s5/local/REVERB_wsjcam0_data_prep.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash - -# Copyright 2013 MERL (author: Felix Weninger) -# Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey) - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -dir=$PWD/data/local/data -lmdir=$PWD/data/local/nist_lm -mkdir -p $dir $lmdir -local=$PWD/local -utils=$PWD/utils -root=$PWD - -. ./path.sh # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi - -RWSJ=$1 # input corpus (original or processed, tr or dt, etc.) -dataset=REVERB_dt # the name of the dataset to be created -if [ ! -z "$2" ]; then - dataset=$2 -fi -dt_or_x=dt # the WSJCAM0 set that the set is based on (tr, dt, ...) -# this will be used to find the correct transcriptions etc. -if [ ! -z "$3" ]; then - dt_or_x=$3 -fi - -if [ ! -d "$RWSJ" ]; then - echo Could not find directory $RWSJ! Check pathnames in corpus.sh! - exit 1 -fi - -cd $dir -MIC=primary - -# unfortunately, we need a pointer to HTK baseline -# since the corpus does NOT contain the data set descriptions -# for the REVERB Challenge -taskFileDir=$dir/../reverb_tools/ReleasePackage/reverb_tools_for_asr_ver2.0/taskFiles/1ch -#taskFiles=`ls $taskFileDir/*Data_dt_for_*` -nch=1 -if [ "$dt_or_x" = "tr" ]; then - taskFiles=`ls $taskFileDir/SimData_tr_for_${nch}ch*` || exit 1 -else - taskFiles=`ls $taskFileDir/SimData_${dt_or_x}_for_${nch}ch_{far,near}*` || exit 1 -fi -for taskFile in $taskFiles; do - -set=`basename $taskFile` - -#taskFile=$taskFileDir/$set -dir2=$dir/$dataset -mkdir -p $dir2 -# contains pointer to wav files with relative path --> add absolute path -echo taskFile = $taskFile -awk '{print "'$RWSJ/data'"$1}' < $taskFile > $dir2/${set}.flist || exit 1; - -# this is like flist2scp.pl but it can take wav file list as input -perl -e 'while(<>){ - m:^\S+/(\w{8})\w*\.wav$: || die "Bad line $_"; - $id = lc $1; - print "$id $_"; -}' < $dir2/$set.flist | sort > $dir2/${set}_wav.scp || exit 1; - -# find transcriptions of given utterances in si_dt.dot -# create a trans1 file for each set, convert to txt (kaldi "MLF") -dot=$dir/si_${dt_or_x}.dot -perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' $taskFile |\ -perl $local/find_transcripts_singledot.pl $dot \ -> $dir2/$set.trans1 || exit 1; - -noiseword=""; -cat $dir2/$set.trans1 | $local/normalize_transcript.pl $noiseword | sort | uniq > $dir2/$set.txt || exit 1; -#exit - - -# Make the utt2spk and spk2utt files. -cat $dir2/${set}_wav.scp | awk '{print $1, $1}' > $dir2/$set.utt2spk || exit 1; -cat $dir2/$set.utt2spk | $utils/utt2spk_to_spk2utt.pl > $dir2/$set.spk2utt || exit 1; - -# Create directory structure required by decoding scripts -cd $root -mkdir -p data/$dataset/$set -cp $dir2/${set}_wav.scp data/$dataset/$set/wav.scp || exit 1; -cp $dir2/$set.txt data/$dataset/$set/text || exit 1; -cp $dir2/$set.spk2utt data/$dataset/$set/spk2utt || exit 1; -cp $dir2/$set.utt2spk data/$dataset/$set/utt2spk || exit 1; - -echo "Data preparation for $set succeeded" -#echo "Put files into $dir2/$set.*" - - -mfccdir=mfcc/$dataset -#for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do -#for x in si_tr; do -steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 \ - data/$dataset/$set exp/make_mfcc/$dataset/$set $mfccdir || exit 1; -steps/compute_cmvn_stats.sh data/$dataset/$set exp/make_mfcc/$dataset/$set $mfccdir || exit 1; - -done diff --git a/egs/reverb/s5/local/check_tools.sh b/egs/reverb/s5/local/check_tools.sh new file mode 100755 index 00000000000..698d8e411dd --- /dev/null +++ b/egs/reverb/s5/local/check_tools.sh @@ -0,0 +1,53 @@ +#!/bin/bash -u + +# Copyright 2015 (c) Johns Hopkins University (Jan Trmal ) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +[ -f ./path.sh ] && . ./path.sh + +command -v uconv &>/dev/null \ + || { echo >&2 "uconv not found on PATH. You will have to install ICU4C"; exit 1; } + +command -v ngram &>/dev/null \ + || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh to install it"; exit 1; } + +if [ -z ${LIBLBFGS} ]; then + echo >&2 "SRILM is not compiled with the support of MaxEnt models." + echo >&2 "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh" + echo >&2 "which will take care of compiling the SRILM with MaxEnt support" + exit 1; +fi + +sox=`command -v sox 2>/dev/null` \ + || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; } + +# If sox is found on path, check if the version is correct +if [ ! -z "$sox" ]; then + sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'` + if [[ ! $sox_version =~ v14.4.* ]]; then + echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher." + exit 1 + fi +fi + +command -v phonetisaurus-align &>/dev/null \ + || { echo >&2 "Phonetisaurus not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_phonetisaurus.sh to install it"; exit 1; } + +command -v ffmpeg &>/dev/null \ + || { echo >&2 "FFMPEG not found on PATH. You will have to install FFMPEG"; exit 1; } + +exit 0 + + diff --git a/egs/reverb/s5/local/generate_data.sh b/egs/reverb/s5/local/generate_data.sh new file mode 100755 index 00000000000..3228f0e1b3c --- /dev/null +++ b/egs/reverb/s5/local/generate_data.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Apache 2.0 +# This script is adapted from data preprations scripts in the Kaldi reverb recipe +# https://github.com/kaldi-asr/kaldi/tree/master/egs/reverb/s5/local + +# Begin configuration section. +wavdir=${PWD}/wav +# End configuration section + +. ./utils/parse_options.sh # accept options.. you can run this run.sh with the + +. ./path.sh + +echo >&2 "$0" "$@" +if [ $# -ne 1 ] ; then + echo >&2 "$0" "$@" + echo >&2 "$0: Error: wrong number of arguments" + echo -e >&2 "Usage:\n $0 [opts] " + echo -e >&2 "eg:\n $0 /export/corpora3/LDC/LDC95S24/wsjcam0" + exit 1 +fi + +set -e -o pipefail + +wsjcam0=$1 +mkdir -p ${wavdir} + +# tool directory +dir=${PWD}/data/local/reverb_tools +mkdir -p ${dir} + +# Download tools +URL1="http://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_mcTrainData.tgz" +URL2="http://reverb2014.dereverberation.com/tools/REVERB_TOOLS_FOR_ASR_ver2.0.tgz" +for f in $URL1 $URL2; do + x=`basename $f` + if [ ! -e $dir/$x ]; then + wget $f -O $dir/$x || exit 1; + tar zxvf $dir/$x -C $dir || exit 1; + fi +done +URL3="http://reverb2014.dereverberation.com/tools/taskFiles_et.tgz" +x=`basename $URL3` +if [ ! -e $dir/$x ]; then + wget $URL3 -O $dir/$x || exit 1; + tar zxvf $dir/$x -C $dir || exit 1; + cp -fr $dir/`basename $x .tgz`/* $dir/ReleasePackage/reverb_tools_for_asr_ver2.0/taskFiles/ +fi + +# generate WAV files for matlab +echo "generating WAV files" +sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +if [ ! -x $sph2pipe ]; then + echo "Could not find (or execute) the sph2pipe program at ${sph2pipe}"; + exit 1; +fi +for sph in `cat ${dir}/reverb_tools_for_Generate_mcTrainData/etc/audio_si_tr.lst`; do + d=`dirname ${wavdir}/WSJCAM0/data/${sph}` + if [ ! -d "${d}" ]; then + mkdir -p ${d} + fi + ${sph2pipe} -f wav ${wsjcam0}/data/${sph}.wv1 > ${wavdir}/WSJCAM0/data/${sph}.wav +done +nwav=`find ${wavdir}/WSJCAM0/data/primary_microphone/si_tr | grep .wav | wc -l` +echo "generated ${nwav} WAV files (it must be 7861)" +[ "$nwav" -eq 7861 ] || echo "Warning: expected 7861 WAV files, got $nwav" + +# generalte training data +reverb_tr_dir=${wavdir}/REVERB_WSJCAM0_tr +cp local/Generate_mcTrainData_cut.m $dir/reverb_tools_for_Generate_mcTrainData/ +pushd $dir/reverb_tools_for_Generate_mcTrainData/ +tmpdir=`mktemp -d tempXXXXX ` +tmpmfile=$tmpdir/run_mat.m +cat < $tmpmfile +addpath(genpath('.')) +Generate_mcTrainData_cut('$wavdir/WSJCAM0', '$reverb_tr_dir'); +EOF +cat $tmpmfile | matlab -nodisplay +rm -rf $tmpdir +popd + +echo "Successfully generated multi-condition training data and stored it in $reverb_tr_dir." && exit 0; diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh new file mode 100755 index 00000000000..d7d16ba07bf --- /dev/null +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# +# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Apache 2.0 +# This script is adapted from data preparation scripts in the Kaldi reverb recipe +# https://github.com/kaldi-asr/kaldi/tree/master/egs/reverb/s5/local + +# Begin configuration section. +# End configuration section +. ./utils/parse_options.sh # accept options.. you can run this run.sh with the + +. ./path.sh + +echo >&2 "$0" "$@" +if [ $# -ne 1 ] ; then + echo >&2 "$0" "$@" + echo >&2 "$0: Error: wrong number of arguments" + echo -e >&2 "Usage:\n $0 [opts] " + echo -e >&2 "eg:\n $0 /export/corpora5/REVERB_2014/REVERB" + exit 1 +fi + +set -e -o pipefail + +reverb=$1 + +# working directory +dir=${PWD}/data/local/data +mkdir -p ${dir} + +for task in dt et; do + if [ ${task} == 'dt' ]; then + mlf=${reverb}/MC_WSJ_AV_Dev/mlf/WSJ.mlf + elif [ ${task} == 'et' ]; then + mlf=${reverb}/MC_WSJ_AV_Eval/mlf/WSJ.mlf + fi + # MLF transcription correction + # taken from HTK baseline script + sed -e ' +# dos to unix line feed conversion +s/\x0D$//' \ + -e " + s/\x60//g # remove unicode character grave accent. + " \ + -e " + # fix the single quote for the word yield + # and the quoted ROOTS + # e.g. yield' --> yield + # reason: YIELD' is not in dict, while YIELD is + s/YIELD'/YIELD/g + s/'ROOTS'/ROOTS/g + s/'WHERE/WHERE/g + s/PEOPLE'/PEOPLE/g + s/SIT'/SIT/g + s/'DOMINEE/DOMINEE/g + s/CHURCH'/CHURCH/g" \ + -e ' + # fix the single missing double full stop issue at the end of an utterance + # e.g. I. C. N should be I. C. N. + # reason: N is not in dict, while N. is + /^[A-Z]$/ { + # append a line + N + # search for single dot on the second line + /\n\./ { + # found it - now replace the + s/\([A-Z]\)\n\./\1\.\n\./ + } + }' \ + $mlf |\ + perl local/mlf2text.pl > ${dir}/${task}.txt +done + + +noiseword=""; +for nch in 1 2 8; do + taskdir=data/local/reverb_tools/ReleasePackage/reverb_tools_for_asr_ver2.0/taskFiles/${nch}ch + # make a wav list + for task in dt et; do + if [ ${task} == 'dt' ]; then + audiodir=${reverb}/MC_WSJ_AV_Dev + elif [ ${task} == 'et' ]; then + audiodir=${reverb}/MC_WSJ_AV_Eval + fi + for x in `ls ${taskdir} | grep RealData | grep _${task}_`; do + perl -se 'while(<>){m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; $id = lc $1; print "$id $dir$_";}' -- -dir=${audiodir} ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_real_${nch}ch_wav.scp + done + # make a transcript + for task in dt et; do + for x in `ls ${taskdir} | grep RealData | grep _${task}_`; do + perl -se 'while(<>){m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; $id = lc $1; print "$id\n";}' ${taskdir}/$x |\ + perl local/find_transcripts_txt.pl ${dir}/${task}.txt |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_real_${nch}ch.trans1 || exit 1; + cat ${dir}/${task}_real_${nch}ch.trans1 | local/normalize_transcript.pl ${noiseword} > ${dir}/${task}_real_${nch}ch.txt || exit 1; + done + + # Make the utt2spk and spk2utt files. + for task in dt et; do + cat ${dir}/${task}_real_${nch}ch_wav.scp | awk '{print $1}' | awk -F '_' '{print $0 " " $1}' > ${dir}/${task}_real_${nch}ch.utt2spk || exit 1; + cat ${dir}/${task}_real_${nch}ch.utt2spk | ./utils/utt2spk_to_spk2utt.pl > ${dir}/${task}_real_${nch}ch.spk2utt || exit 1; + done +done + +# finally copy the above files to the data directory +for nch in 1 2 8; do + for task in dt et; do + datadir=data/${task}_real_${nch}ch + mkdir -p ${datadir} + sort ${dir}/${task}_real_${nch}ch_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_real_${nch}ch.txt > ${datadir}/text + sort ${dir}/${task}_real_${nch}ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_real_${nch}ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + done +done + diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh new file mode 100755 index 00000000000..08eec3a038c --- /dev/null +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# +# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Apache 2.0 +# This script is adapted from data preparation scripts in the Kaldi reverb recipe +# https://github.com/kaldi-asr/kaldi/tree/master/egs/reverb/s5/local + +# Begin configuration section. +wavdir=${PWD}/wav +# End configuration section +. ./utils/parse_options.sh # accept options.. you can run this run.sh with the + +. ./path.sh + +echo >&2 "$0" "$@" +if [ $# -ne 2 ] ; then + echo >&2 "$0" "$@" + echo >&2 "$0: Error: wrong number of arguments" + echo -e >&2 "Usage:\n $0 [opts] " + echo -e >&2 "eg:\n $0 /export/corpora5/REVERB_2014/REVERB /export/corpora3/LDC/LDC95S24/wsjcam0" + exit 1 +fi + +set -e -o pipefail + +reverb=$1 +wsjcam0=$2 + +# tool directory +tooldir=${PWD}/data/local/reverb_tools + +# working directory +dir=${PWD}/data/local/data +mkdir -p ${dir} + +# make a one dot file for train, dev, and eval data +# the directory structure of WSJCAM0 is not consistent and we need such process for each task +cp ${wsjcam0}/data/primary_microphone/etc/si_tr.dot ${dir}/tr.dot +cat ${wsjcam0}/data/primary_microphone/etc/si_dt*.dot | sort > ${dir}/dt.dot +cat ${wsjcam0}/data/*/si_et*/*/*.dot | sort > ${dir}/et.dot + +noiseword=""; +for nch in 1 2 8; do + taskdir=data/local/reverb_tools/ReleasePackage/reverb_tools_for_asr_ver2.0/taskFiles/${nch}ch + # make a wav list + task=tr + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch_wav.scp + for task in dt et; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${reverb}/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch_wav.scp + done + + # make a transcript + for task in tr dt et; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ + perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch.trans1 || exit 1; + cat ${dir}/${task}_simu_${nch}ch.trans1 | local/normalize_transcript.pl ${noiseword} > ${dir}/${task}_simu_${nch}ch.txt || exit 1; + done + + # Make the utt2spk and spk2utt files. + for task in tr dt et; do + cat ${dir}/${task}_simu_${nch}ch_wav.scp | awk '{print $1}' | awk -F '_' '{print $0 " " $1}' > ${dir}/${task}_simu_${nch}ch.utt2spk || exit 1; + cat ${dir}/${task}_simu_${nch}ch.utt2spk | ./utils/utt2spk_to_spk2utt.pl > ${dir}/${task}_simu_${nch}ch.spk2utt || exit 1; + done +done + +# finally copy the above files to the data directory +for nch in 1 2 8; do + for task in tr dt et; do + datadir=data/${task}_simu_${nch}ch + mkdir -p ${datadir} + sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_simu_${nch}ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_${nch}ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_${nch}ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + done +done + diff --git a/egs/reverb/s5/local/score_mbr.sh b/egs/reverb/s5/local/score_mbr.sh deleted file mode 120000 index 2573fadf042..00000000000 --- a/egs/reverb/s5/local/score_mbr.sh +++ /dev/null @@ -1 +0,0 @@ -../../../wsj/s5/local/score_mbr.sh \ No newline at end of file diff --git a/egs/reverb/s5/path.sh b/egs/reverb/s5/path.sh index 1a6fb5f891b..f46c5d8cb72 100644 --- a/egs/reverb/s5/path.sh +++ b/egs/reverb/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +[ -f $KALDI_ROOT/tools/extras/env.sh ] && . $KALDI_ROOT/tools/extras/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index cb0b00c19b6..36225902cb7 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -33,7 +33,7 @@ fi . ./cmd.sh . ./path.sh -stage=1 +stage=0 . utils/parse_options.sh # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline', @@ -41,28 +41,20 @@ set -euxo pipefail # please make sure to set the paths of the REVERB and WSJ0 data if [[ $(hostname -f) == *.clsp.jhu.edu ]] ; then - REVERB_home=/export/corpora5/REVERB_2014/REVERB + reverb=/export/corpora5/REVERB_2014/REVERB export wsjcam0=/export/corpora3/LDC/LDC95S24/wsjcam0 # set LDC WSJ0 directory to obtain LMs # REVERB data directory only provides bi-gram (bcb05cnp), but this recipe also uses 3-gram (tcb05cnp.z) export wsj0=/export/corpora5/LDC/LDC93S6A/11-13.1 #LDC93S6A or LDC93S6B # It is assumed that there will be a 'wsj0' subdirectory # within the top-level corpus directory -elif [[ $(hostname -f) == *.merl.com ]] ; then - REVERB_home=/db/laputa1/data/original/public/REVERB - export wsjcam0=$REVERB_home/wsjcam0 - # set LDC WSJ0 directory to obtain LMs - # REVERB data directory only provides bi-gram (bcb05cnp), but this recipe also uses 3-gram (tcb05cnp.z) - export wsj0=/db/laputa1/data/original/public/WSJ0/11-13.1 #LDC93S6A or LDC93S6B - # It is assumed that there will be a 'wsj0' subdirectory - # within the top-level corpus directory else echo "Set the data directory locations." && exit 1; fi -export reverb_dt=$REVERB_home/REVERB_WSJCAM0_dt -export reverb_et=$REVERB_home/REVERB_WSJCAM0_et -export reverb_real_dt=$REVERB_home/MC_WSJ_AV_Dev -export reverb_real_et=$REVERB_home/MC_WSJ_AV_Eval +export reverb_dt=$reverb/REVERB_WSJCAM0_dt +export reverb_et=$reverb/REVERB_WSJCAM0_et +export reverb_real_dt=$reverb/MC_WSJ_AV_Dev +export reverb_real_et=$reverb/MC_WSJ_AV_Eval # set the directory of the multi-condition training data to be generated reverb_tr=`pwd`/data_tr_cut/REVERB_WSJCAM0_tr_cut @@ -82,11 +74,16 @@ nj_decode=8 # set to true if you want the tri2a systems (re-implementation of the HTK baselines) do_tri2a=true -if [ $stage -le 1 ]; then - # Generate multi-condition training data - # Note that utterance lengths match the original set. - # This enables using clean alignments in multi-condition training (stereo training) - local/REVERB_create_mcdata.sh $wsjcam0 $reverb_tr +./local/check_tools.sh || exit 1 + +if [ ${stage} -le 1 ]; then + ### Task dependent. You have to make the following data preparation part by yourself. + ### But you can utilize Kaldi recipes in most cases + wavdir=$PWD/wav + echo "stage 0: Data preparation" + local/generate_data.sh --wavdir ${wavdir} ${wsjcam0} + local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0} + local/prepare_real_data.sh ${reverb} fi if [ $stage -le 2 ]; then @@ -97,6 +94,7 @@ if [ $stage -le 2 ]; then local/wsj_prepare_beep_dict.sh # Prepare wordlists, etc. + utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang # Prepare directory structure for clean data. Apply some language model fixes. @@ -111,17 +109,17 @@ if [ $stage -le 2 ]; then # local/REVERB_wsjcam0_data_prep.sh /path/to/processed/REVERB_WSJCAM0_dt processed_REVERB_dt dt # The first argument is supposed to point to a folder that has the same structure # as the REVERB corpus. - local/REVERB_wsjcam0_data_prep.sh $reverb_tr REVERB_tr_cut tr - local/REVERB_wsjcam0_data_prep.sh $reverb_dt REVERB_dt dt - local/REVERB_wsjcam0_data_prep.sh $reverb_et REVERB_et et - - # Prepare the REVERB "real" dt set from MCWSJAV corpus. - # This corpus is *never* used for training. - # This creates the data set called REVERB_Real_dt and its subfolders - local/REVERB_mcwsjav_data_prep.sh $reverb_real_dt REVERB_Real_dt dt - # The MLF file exists only once in the corpus, namely in the real_dt directory - # so we pass it as 4th argument - local/REVERB_mcwsjav_data_prep.sh $reverb_real_et REVERB_Real_et et $reverb_real_dt/mlf/WSJ.mlf + # local/REVERB_wsjcam0_data_prep.sh $reverb_tr REVERB_tr_cut tr + # local/REVERB_wsjcam0_data_prep.sh $reverb_dt REVERB_dt dt + # local/REVERB_wsjcam0_data_prep.sh $reverb_et REVERB_et et + + # # Prepare the REVERB "real" dt set from MCWSJAV corpus. + # # This corpus is *never* used for training. + # # This creates the data set called REVERB_Real_dt and its subfolders + # local/REVERB_mcwsjav_data_prep.sh $reverb_real_dt REVERB_Real_dt dt + # # The MLF file exists only once in the corpus, namely in the real_dt directory + # # so we pass it as 4th argument + # local/REVERB_mcwsjav_data_prep.sh $reverb_real_et REVERB_Real_et et $reverb_real_dt/mlf/WSJ.mlf fi if [ $stage -le 3 ]; then @@ -129,7 +127,7 @@ if [ $stage -le 3 ]; then # For the non-clean data sets, this is outsourced to the data preparation scripts. mfccdir=mfcc ### for x in si_tr si_dt; do it seems that the number of transcriptions of si_dt is not correct. - for x in si_tr; do + for x in dt_real_8ch dt_simu_8ch et_real_8ch et_simu_8ch; do # tr_simu_8ch steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj_train \ data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir @@ -140,22 +138,22 @@ if [ $stage -le 4 ]; then # Train monophone model on clean data (si_tr). echo "### TRAINING mono0a ###" steps/train_mono.sh --boost-silence 1.25 --nj $nj_train --cmd "$train_cmd" \ - data/si_tr data/lang exp/mono0a + data/tr_simu_8ch data/lang exp/mono0a # Align monophones with clean data. echo "### ALIGNING mono0a_ali ###" steps/align_si.sh --boost-silence 1.25 --nj $nj_train --cmd "$train_cmd" \ - data/si_tr data/lang exp/mono0a exp/mono0a_ali + data/tr_simu_8ch data/lang exp/mono0a exp/mono0a_ali # Create first triphone recognizer. echo "### TRAINING tri1 ###" steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ - 2000 10000 data/si_tr data/lang exp/mono0a_ali exp/tri1 + 2000 10000 data/tr_simu_8ch data/lang exp/mono0a_ali exp/tri1 echo "### ALIGNING tri1_ali ###" # Re-align triphones. steps/align_si.sh --nj $nj_train --cmd "$train_cmd" \ - data/si_tr data/lang exp/tri1 exp/tri1_ali + data/tr_simu_8ch data/lang exp/tri1 exp/tri1_ali fi # The following code trains and evaluates a delta feature recognizer, which is similar to the HTK @@ -164,48 +162,47 @@ if $do_tri2a; then if [ $stage -le 5 ]; then # Train tri2a, which is deltas + delta-deltas, on clean data. steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 15000 data/si_tr data/lang exp/tri1_ali exp/tri2a + 2500 15000 data/tr_simu_8ch data/lang exp/tri1_ali exp/tri2a # Re-align triphones using clean data. This gives a smallish performance gain. steps/align_si.sh --nj $nj_train --cmd "$train_cmd" \ - data/si_tr data/lang exp/tri2a exp/tri2a_ali + data/tr_simu_8ch data/lang exp/tri2a exp/tri2a_ali # Train a multi-condition triphone recognizer. # This uses alignments on *clean* data, which is allowed for REVERB. # However, we have to use the "cut" version so that the length of the # waveforms match. # It is actually asserted by the Challenge that clean and multi-condition waves are aligned. - steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 15000 data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri2a_ali exp/tri2a_mc + # steps/train_deltas.sh --cmd "$train_cmd" \ + # 2500 15000 data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri2a_ali exp/tri2a_mc # Prepare clean and mc tri2a models for decoding. - utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg_5k & - utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a_mc exp/tri2a_mc/graph_bg_5k & - wait + utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg_5k + # utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a_mc exp/tri2a_mc/graph_bg_5k & fi if [ $stage -le 6 ]; then # decode REVERB dt using tri2a, clean - for dataset in data/REVERB_*{dt,et}/*; do + for dataset in data/{dt,et}*_8ch; do steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ exp/tri2a/graph_bg_5k $dataset exp/tri2a/decode_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & done # decode REVERB dt using tri2a, mc - for dataset in data/REVERB_*{dt,et}/*; do - steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ - exp/tri2a_mc/graph_bg_5k $dataset exp/tri2a_mc/decode_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & - done + # for dataset in data/{dt,et}*_8ch; do + # steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ + # exp/tri2a_mc/graph_bg_5k $dataset exp/tri2a_mc/decode_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & + # done # basis fMLLR for tri2a_mc system # This computes a transform for every training utterance and computes a basis from that. - steps/get_fmllr_basis.sh --cmd "$train_cmd" --per-utt true data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri2a_mc + # steps/get_fmllr_basis.sh --cmd "$train_cmd" --per-utt true data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri2a_mc # Recognition using fMLLR adaptation (per-utterance processing). - for dataset in data/REVERB_*{dt,et}/*; do - steps/decode_basis_fmllr.sh --nj $nj_decode --cmd "$decode_cmd" \ - exp/tri2a_mc/graph_bg_5k $dataset exp/tri2a_mc/decode_basis_fmllr_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & - done + # for dataset in data/{dt,et}*_8ch; do + # steps/decode_basis_fmllr.sh --nj $nj_decode --cmd "$decode_cmd" \ + # exp/tri2a_mc/graph_bg_5k $dataset exp/tri2a_mc/decode_basis_fmllr_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & + # done wait fi fi @@ -215,24 +212,24 @@ if [ $stage -le 7 ]; then echo "### TRAINING tri2b ###" steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=$context_size --right-context=$context_size" \ - 2500 15000 data/si_tr data/lang exp/tri1_ali exp/tri2b + 2500 15000 data/tr_simu_8ch data/lang exp/tri1_ali exp/tri2b # tri2b (LDA-MLLT system) with multi-condition training, using default parameters. - echo "### TRAINING tri2b_mc ###" - steps/train_lda_mllt.sh --cmd "$train_cmd"\ - --splice-opts "--left-context=$context_size --right-context=$context_size" \ - 2500 15000 data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri1_ali exp/tri2b_mc + # echo "### TRAINING tri2b_mc ###" + # steps/train_lda_mllt.sh --cmd "$train_cmd"\ + # --splice-opts "--left-context=$context_size --right-context=$context_size" \ + # 2500 15000 data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri1_ali exp/tri2b_mc fi # Prepare tri2b* systems for decoding. if [ $stage -le 8 ]; then echo "### MAKING GRAPH {tri2b,tri2b_mc}/graph_$lm ###" - for recog in tri2b tri2b_mc; do + for recog in tri2b; do #tri2b_mc utils/mkgraph.sh data/lang_test_$lm exp/$recog exp/$recog/graph_$lm & done wait fi - +exit 1 # discriminative training on top of multi-condition systems # one could also add tri2b here to have a DT clean recognizer for reference if [ $stage -le 9 ]; then From 2bc97b9de6cf1de4d9e4e569b6afe4ff04c5ff0b Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Thu, 13 Sep 2018 01:45:24 -0400 Subject: [PATCH 02/39] add files for TDNN training and modify some files --- egs/reverb/s5/conf/mfcc_hires.conf | 10 + egs/reverb/s5/conf/online_cmvn.conf | 1 + egs/reverb/s5/local/calc_wer.sh | 55 --- egs/reverb/s5/local/chain/compare_wer.sh | 131 +++++++ egs/reverb/s5/local/chain/run_tdnn.sh | 1 + .../s5/local/chain/tuning/run_tdnn_1a.sh | 281 +++++++++++++++ .../s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 297 +++++++++++++++ egs/reverb/s5/local/nnet3/compare_wer.sh | 132 +++++++ .../s5/local/nnet3/run_ivector_common.sh | 149 ++++++++ egs/reverb/s5/local/train_lms_srilm.sh | 261 ++++++++++++++ egs/reverb/s5/run.sh | 339 ++++++++---------- 11 files changed, 1408 insertions(+), 249 deletions(-) create mode 100644 egs/reverb/s5/conf/mfcc_hires.conf create mode 100644 egs/reverb/s5/conf/online_cmvn.conf delete mode 100755 egs/reverb/s5/local/calc_wer.sh create mode 100755 egs/reverb/s5/local/chain/compare_wer.sh create mode 120000 egs/reverb/s5/local/chain/run_tdnn.sh create mode 100755 egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh create mode 100755 egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/reverb/s5/local/nnet3/compare_wer.sh create mode 100755 egs/reverb/s5/local/nnet3/run_ivector_common.sh create mode 100755 egs/reverb/s5/local/train_lms_srilm.sh diff --git a/egs/reverb/s5/conf/mfcc_hires.conf b/egs/reverb/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..fd64b62eb16 --- /dev/null +++ b/egs/reverb/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=16000 +--num-mel-bins=40 +--num-ceps=40 +--low-freq=40 +--high-freq=-400 diff --git a/egs/reverb/s5/conf/online_cmvn.conf b/egs/reverb/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/reverb/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/reverb/s5/local/calc_wer.sh b/egs/reverb/s5/local/calc_wer.sh deleted file mode 100755 index c4b5eeb87f3..00000000000 --- a/egs/reverb/s5/local/calc_wer.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -# Copyright 2016 MERL (author: Shinji Watanabe) - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -. ./cmd.sh -. ./path.sh - -lmw=15 -am="tri2a" -lm="bg_5k" -decode="" - -. utils/parse_options.sh - -if [ ! -z $decode ]; then - decode="_$decode" -fi - -dir="exp/$am/decode${decode}_${lm}_REVERB_" -echo "####################" -echo "${dir}*dt*" -for a in `echo ${dir}*dt* | tr " " "\n" | grep -v "A\.si"`; do - echo $a | awk -F '_' '{for(i=NF-6;i [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/reverb/s5/local/chain/run_tdnn.sh b/egs/reverb/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/reverb/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..61cc8b97d41 --- /dev/null +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,281 @@ +#!/bin/bash + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=96 +train_set=tr_simu_8ch +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch" +gmm=tri3 +nnet3_affix=_tr_simu_8ch +lm_suffix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a # affix for the TDNN directory name +tree_affix= +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 11 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj ${nj} --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 12 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 13 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + opts="l2-regularize=0.05" + output_opts="l2-regularize=0.01 bottleneck-dim=320" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 $opts dim=512 + relu-batchnorm-layer name=tdnn2 $opts dim=512 input=Append(-1,0,1) + relu-batchnorm-layer name=tdnn3 $opts dim=512 + relu-batchnorm-layer name=tdnn4 $opts dim=512 input=Append(-1,0,1) + relu-batchnorm-layer name=tdnn5 $opts dim=512 + relu-batchnorm-layer name=tdnn6 $opts dim=512 input=Append(-3,0,3) + relu-batchnorm-layer name=tdnn7 $opts dim=512 input=Append(-3,0,3) + relu-batchnorm-layer name=tdnn8 $opts dim=512 input=Append(-6,-3,0) + + ## adding the layers for chain branch + relu-batchnorm-layer name=prefinal-chain $opts dim=512 target-rms=0.5 + output-layer name=output include-log-softmax=false $output_opts dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-batchnorm-layer name=prefinal-xent input=tdnn8 $opts dim=512 target-rms=0.5 + output-layer name=output-xent $output_opts dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 14 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/chime5-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=10 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=4 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=256,128,64 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 15 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + $tree_dir $tree_dir/graph${lm_suffix} || exit 1; +fi + +if [ $stage -le 16 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + steps/nnet3/decode.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $chunk_left_context \ + --extra-right-context $chunk_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk $frames_per_chunk \ + --nj 8 --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \ + $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + +# Not testing the 'looped' decoding separately, because for +# TDNN systems it would give exactly the same results as the +# normal decoding. + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l $lang/topo + fi +fi + +if [ $stage -le 11 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj ${nj} --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 12 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + +if [ $stage -le 13 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=40" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=$hidden_dim + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim + + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm4 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 14 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/chime5-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + mkdir -p $dir/egs + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$train_cmd --mem 4G" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs $num_epochs \ + --trainer.srand=$srand \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=16 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.momentum=0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts="--frames-overlap-per-eg 0" \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 15 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + $tree_dir $tree_dir/graph${lm_suffix} || exit 1; +fi + +if [ $stage -le 16 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + steps/nnet3/decode.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $chunk_left_context \ + --extra-right-context $chunk_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk $frames_per_chunk \ + --nj 8 --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \ + $tree_dir/graph${lm_suffix} data/${data}_hires ${dir}/decode${lm_suffix}_${data} || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + +# Not testing the 'looped' decoding separately, because for +# TDNN systems it would give exactly the same results as the +# normal decoding. + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/reverb/s5/local/nnet3/run_ivector_common.sh b/egs/reverb/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..e28e5ce996d --- /dev/null +++ b/egs/reverb/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +set -euo pipefail + +# This script is called from local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more +# scripts). It contains the common feature preparation and +# iVector-related parts of the script. See those scripts for examples +# of usage. + +stage=0 +train_set=train_worn_u100k +test_sets="dev_worn dev_beamformit_ref" +gmm=tri3 +nj=96 + +nnet3_affix=_train_worn_u100k + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +if [ $stage -le 1 ]; then + # Although the nnet will be trained by high resolution data, we still have to + # perturb the normal data to get the alignment _sp stands for speed-perturbed + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + echo "$0: making MFCC features for low-resolution speed-perturbed data" + steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/${train_set}_sp || exit 1; + steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; + utils/fix_data_dir.sh data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj ${nj} --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13). + # this shows how you can split across multiple file-systems. + echo "$0: creating high-resolution MFCC features" + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/mfcc/chime5-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; + utils/fix_data_dir.sh data/${datadir}_hires || exit 1; + done +fi + +if [ $stage -le 4 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + # We'll use about a quarter of the data. + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + num_utts_total=$(wc -l " +# End configuration section + +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 + +echo "-------------------------------------" +echo "Building an SRILM language model " +echo "-------------------------------------" + +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + +datadir=$1 +tgtdir=$2 + +##End of configuration +loc=`which ngram-count`; +if [ -z $loc ]; then + echo >&2 "You appear to not have SRILM tools installed, either on your path," + echo >&2 "Use the script \$KALDI_ROOT/tools/install_srilm.sh to install it." + exit 1 +fi + +# Prepare the destination directory +mkdir -p $tgtdir + +for f in $words_file $train_text $dev_text; do + [ ! -s $f ] && echo "No such file $f" && exit 1; +done + +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$train_text + dev_text=$dev_text +else + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + +fi + +[ ! -f $words_file ] && echo >&2 "File $words_file must exist!" && exit 1 +[ ! -f $train_text ] && echo >&2 "File $train_text must exist!" && exit 1 +[ ! -f $dev_text ] && echo >&2 "File $dev_text must exist!" && exit 1 + + +# Extract the word list from the training dictionary; exclude special symbols +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab +if (($?)); then + echo "Failed to create vocab from $words_file" + exit 1 +else + # wc vocab # doesn't work due to some encoding issues + echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +# We also have to avoid skewing the LM by incorporating the same sentences +# from different channels +sed -e "s/\.CH.//" -e "s/_.\-./_/" -e "s/NOLOCATION\(\.[LR]\)*-//" -e "s/U[0-9][0-9]_//" $train_text | sort -u | \ + perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/train.txt +if (($?)); then + echo "Failed to create $tgtdir/train.txt from $train_text" + exit 1 +else + echo "Removed first word (uid) from every line of $train_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +sed -e "s/\.CH.//" -e "s/_.\-./_/" $dev_text | sort -u | \ + perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/dev.txt +if (($?)); then + echo "Failed to create $tgtdir/dev.txt from $dev_text" + exit 1 +else + echo "Removed first word (uid) from every line of $dev_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + + +echo "-------------------" +echo "Good-Turing 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 \ + -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 \ + -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 \ + -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 \ + -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn111.gz -kndiscount1 -gt1min 1 \ + -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn112.gz -kndiscount1 -gt1min 1 \ + -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn122.gz -kndiscount1 -gt1min 1 \ + -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn123.gz -kndiscount1 -gt1min 1 \ + -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + + +echo "-------------------" +echo "Good-Turing 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.gt0111.gz \ + -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz \ + -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz \ + -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz \ + -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz \ + -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz \ + -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz \ + -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.kn0111.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz \ + -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \ + -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + #please note that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + ngram -lm - -order 3 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + ngram -lm - -order 4 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 +else + echo >&2 "SRILM is not compiled with the support of MaxEnt models." + echo >&2 "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh" + echo >&2 "which will take care of compiling the SRILM with MaxEnt support" + exit 1; +fi + + +echo "--------------------" +echo "Computing perplexity" +echo "--------------------" +( + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt + +echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " +echo "" + +for best_ngram in {3,4}gram ; do + outlm=best_${best_ngram}.gz + lmfilename=$(grep "${best_ngram}" $tgtdir/perplexities.txt | head -n 1 | cut -f 1 -d ' ') + echo "$outlm -> $lmfilename" + (cd $tgtdir; rm -f $outlm; ln -sf $(basename $lmfilename) $outlm ) +done diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 36225902cb7..fe35430163e 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -30,10 +30,13 @@ if [ ! `which matlab` ]; then exit 1 fi +./local/check_tools.sh || exit 1 + . ./cmd.sh . ./path.sh stage=0 + . utils/parse_options.sh # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline', @@ -51,13 +54,10 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]] ; then else echo "Set the data directory locations." && exit 1; fi -export reverb_dt=$reverb/REVERB_WSJCAM0_dt -export reverb_et=$reverb/REVERB_WSJCAM0_et -export reverb_real_dt=$reverb/MC_WSJ_AV_Dev -export reverb_real_et=$reverb/MC_WSJ_AV_Eval -# set the directory of the multi-condition training data to be generated -reverb_tr=`pwd`/data_tr_cut/REVERB_WSJCAM0_tr_cut +#training set and test set +train_set=tr_simu_8ch +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch" # LDA context size (left/right) (4 is default) context_size=4 @@ -66,7 +66,8 @@ context_size=4 lm="tg_5k" # number of jobs for feature extraction and model training -nj_train=30 +nj=92 +decode_nj=20 # number of jobs for decoding nj_decode=8 @@ -74,8 +75,6 @@ nj_decode=8 # set to true if you want the tri2a systems (re-implementation of the HTK baselines) do_tri2a=true -./local/check_tools.sh || exit 1 - if [ ${stage} -le 1 ]; then ### Task dependent. You have to make the following data preparation part by yourself. ### But you can utilize Kaldi recipes in most cases @@ -88,17 +87,27 @@ fi if [ $stage -le 2 ]; then # Prepare wsjcam0 clean data and wsj0 language model. - local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 + # local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 - # Prepare merged BEEP/CMU dictionary. - local/wsj_prepare_beep_dict.sh + # # Prepare merged BEEP/CMU dictionary. + # local/wsj_prepare_beep_dict.sh - # Prepare wordlists, etc. + # # Prepare wordlists, etc. - utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang - - # Prepare directory structure for clean data. Apply some language model fixes. - local/wsjcam0_format_data.sh + # utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang + + # # Prepare directory structure for clean data. Apply some language model fixes. + # local/wsjcam0_format_data.sh + + local/train_lms_srilm.sh \ + --train-text data/${train_set}/text --dev-text data/dt_simu_8ch/text \ + --oov-symbol "" --words-file data/lang/words.txt \ + data/ data/srilm + + LM=data/srilm/best_3gram.gz + # Compiles G for reverb trigram LM + utils/format_lm.sh \ + data/lang $LM data/local/dict/lexicon.txt data/lang # Now it's getting more interesting. # Prepare the multi-condition training data and the REVERB dt set. @@ -123,211 +132,153 @@ if [ $stage -le 2 ]; then fi if [ $stage -le 3 ]; then + for dset in ${train_set} ${test_sets}; do + utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit + utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset} + done +fi + +if [ $stage -le 4 ]; then # Extract MFCC features for clean sets. # For the non-clean data sets, this is outsourced to the data preparation scripts. mfccdir=mfcc ### for x in si_tr si_dt; do it seems that the number of transcriptions of si_dt is not correct. - for x in dt_real_8ch dt_simu_8ch et_real_8ch et_simu_8ch; do # tr_simu_8ch - steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj_train \ + for x in ${train_set} ${test_sets}; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 \ data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir done fi -if [ $stage -le 4 ]; then - # Train monophone model on clean data (si_tr). - echo "### TRAINING mono0a ###" - steps/train_mono.sh --boost-silence 1.25 --nj $nj_train --cmd "$train_cmd" \ - data/tr_simu_8ch data/lang exp/mono0a - - # Align monophones with clean data. - echo "### ALIGNING mono0a_ali ###" - steps/align_si.sh --boost-silence 1.25 --nj $nj_train --cmd "$train_cmd" \ - data/tr_simu_8ch data/lang exp/mono0a exp/mono0a_ali - - # Create first triphone recognizer. - echo "### TRAINING tri1 ###" - steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ - 2000 10000 data/tr_simu_8ch data/lang exp/mono0a_ali exp/tri1 - - echo "### ALIGNING tri1_ali ###" - # Re-align triphones. - steps/align_si.sh --nj $nj_train --cmd "$train_cmd" \ - data/tr_simu_8ch data/lang exp/tri1 exp/tri1_ali -fi +# if [ $stage -le 5 ]; then + # make a subset for monophone training + #utils/subset_data_dir.sh --shortest data/${train_set} 30000 data/${train_set}_30kshort + #utils/subset_data_dir.sh data/${train_set}_10kshort 4000 data/${train_set}_4kshort +# fi -# The following code trains and evaluates a delta feature recognizer, which is similar to the HTK -# baseline (but using per-utterance basis fMLLR instead of batch MLLR). This is for reference only. -if $do_tri2a; then -if [ $stage -le 5 ]; then - # Train tri2a, which is deltas + delta-deltas, on clean data. - steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 15000 data/tr_simu_8ch data/lang exp/tri1_ali exp/tri2a - - # Re-align triphones using clean data. This gives a smallish performance gain. - steps/align_si.sh --nj $nj_train --cmd "$train_cmd" \ - data/tr_simu_8ch data/lang exp/tri2a exp/tri2a_ali - - # Train a multi-condition triphone recognizer. - # This uses alignments on *clean* data, which is allowed for REVERB. - # However, we have to use the "cut" version so that the length of the - # waveforms match. - # It is actually asserted by the Challenge that clean and multi-condition waves are aligned. - # steps/train_deltas.sh --cmd "$train_cmd" \ - # 2500 15000 data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri2a_ali exp/tri2a_mc - - # Prepare clean and mc tri2a models for decoding. - utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg_5k - # utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a_mc exp/tri2a_mc/graph_bg_5k & +if [ $stage -le 6 ]; then + # Starting basic training on MFCC features + steps/train_mono.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set} data/lang exp/mono fi -if [ $stage -le 6 ]; then - # decode REVERB dt using tri2a, clean - for dataset in data/{dt,et}*_8ch; do - steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ - exp/tri2a/graph_bg_5k $dataset exp/tri2a/decode_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & - done +if [ $stage -le 7 ]; then + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set} data/lang exp/mono exp/mono_ali - # decode REVERB dt using tri2a, mc - # for dataset in data/{dt,et}*_8ch; do - # steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ - # exp/tri2a_mc/graph_bg_5k $dataset exp/tri2a_mc/decode_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & - # done - - # basis fMLLR for tri2a_mc system - # This computes a transform for every training utterance and computes a basis from that. - # steps/get_fmllr_basis.sh --cmd "$train_cmd" --per-utt true data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri2a_mc - - # Recognition using fMLLR adaptation (per-utterance processing). - # for dataset in data/{dt,et}*_8ch; do - # steps/decode_basis_fmllr.sh --nj $nj_decode --cmd "$decode_cmd" \ - # exp/tri2a_mc/graph_bg_5k $dataset exp/tri2a_mc/decode_basis_fmllr_bg_5k_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` & - # done - wait -fi + steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1 fi -if [ $stage -le 7 ]; then - # Train tri2b recognizer, which uses LDA-MLLT, using the default parameters from the WSJ recipe. - echo "### TRAINING tri2b ###" +if [ $stage -le 8 ]; then + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set} data/lang exp/tri1 exp/tri1_ali + steps/train_lda_mllt.sh --cmd "$train_cmd" \ - --splice-opts "--left-context=$context_size --right-context=$context_size" \ - 2500 15000 data/tr_simu_8ch data/lang exp/tri1_ali exp/tri2b - - # tri2b (LDA-MLLT system) with multi-condition training, using default parameters. - # echo "### TRAINING tri2b_mc ###" - # steps/train_lda_mllt.sh --cmd "$train_cmd"\ - # --splice-opts "--left-context=$context_size --right-context=$context_size" \ - # 2500 15000 data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/tri1_ali exp/tri2b_mc + 4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2 fi -# Prepare tri2b* systems for decoding. -if [ $stage -le 8 ]; then - echo "### MAKING GRAPH {tri2b,tri2b_mc}/graph_$lm ###" - for recog in tri2b; do #tri2b_mc - utils/mkgraph.sh data/lang_test_$lm exp/$recog exp/$recog/graph_$lm & +if [ $stage -le 9 ]; then + utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph + for dset in ${test_sets}; do + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + exp/tri2/graph data/${dset} exp/tri2/decode_${dset} & done wait fi -exit 1 -# discriminative training on top of multi-condition systems -# one could also add tri2b here to have a DT clean recognizer for reference -if [ $stage -le 9 ]; then - base_recog=tri2b_mc - bmmi_recog=${base_recog}_mmi_b0.1 - echo "### DT $base_recog --> $bmmi_recog ###" - - # get alignments from base recognizer - steps/align_si.sh --nj $nj_train --cmd "$train_cmd" \ - --use-graphs true data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/$base_recog exp/${base_recog}_ali - - # get lattices from base recognizer - denlats_dir=${base_recog}_denlats - subsplit=`echo $nj_train \* 2 | bc` - # DT with multi-condition data ... - steps/make_denlats.sh --sub-split $subsplit --nj $nj_train --cmd "$decode_cmd" \ - data/REVERB_tr_cut/SimData_tr_for_1ch_A data/lang exp/$base_recog exp/$denlats_dir - - # boosted MMI training - steps/train_mmi.sh --boost 0.1 --cmd "$train_cmd" \ - data/REVERB_tr_cut/SimData_tr_for_1ch_A \ - data/lang \ - exp/${base_recog}_ali \ - exp/$denlats_dir \ - exp/$bmmi_recog - cp exp/$base_recog/ali.* exp/$bmmi_recog -fi -# decoding using various recognizers if [ $stage -le 10 ]; then - # put tri2b last since it takes longest due to the large mismatch. - for recog in tri2b_mc tri2b_mc_mmi_b0.1 tri2b; do - # The graph from the ML directory is used in recipe - recog2=`echo $recog | sed s/_mmi.*//` - graph=exp/$recog2/graph_$lm - - echo "### DECODING with $recog, noadapt, $lm ###" - for dataset in data/REVERB_*{dt,et}/*; do - decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ - $graph $dataset \ - exp/$recog/decode_$decode_suff & - done - wait - - echo " ## MBR RESCORING with $recog, noadapt ##" - for dataset in data/REVERB_*{dt,et}/*; do - decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - mkdir -p exp/$recog/decode_mbr_$decode_suff - cp exp/$recog/decode_$decode_suff/lat.*.gz exp/$recog/decode_mbr_$decode_suff - local/score_mbr.sh --cmd "$decode_cmd" \ - $dataset data/lang_test_$lm/ exp/$recog/decode_mbr_$decode_suff & - done - wait - - done # loop recog + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set} data/lang exp/tri2 exp/tri2_ali + + steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3 fi -# decoding using various recognizers with adaptation if [ $stage -le 11 ]; then - # put tri2b last since it takes longest due to the large mismatch. - for recog in tri2b_mc tri2b_mc_mmi_b0.1 tri2b; do - # The graph from the ML directory is used in recipe - recog2=`echo $recog | sed s/_mmi.*//` - graph=exp/$recog2/graph_$lm - - # set the adaptation data - if [[ "$recog" =~ _mc ]]; then - tr_dataset=REVERB_tr_cut/SimData_tr_for_1ch_A - else - tr_dataset=si_tr - fi - - echo "### DECODING with $recog, basis_fmllr, $lm ###" - steps/get_fmllr_basis.sh --cmd "$train_cmd" --per-utt true data/$tr_dataset data/lang exp/$recog - for dataset in data/REVERB_*{dt,et}/*; do - ( - decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - steps/decode_basis_fmllr.sh --nj $nj_decode --cmd "$decode_cmd" \ - $graph $dataset \ - exp/$recog/decode_basis_fmllr_$decode_suff - ) & - done - wait - - echo " ## MBR RESCORING with $recog, basis_fmllr ##" - for dataset in data/REVERB_*{dt,et}/*; do - decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - mkdir -p exp/$recog/decode_mbr_basis_fmllr_$decode_suff - cp exp/$recog/decode_basis_fmllr_$decode_suff/lat.*.gz exp/$recog/decode_mbr_basis_fmllr_$decode_suff - local/score_mbr.sh --cmd "$decode_cmd" \ - $dataset data/lang_test_$lm/ exp/$recog/decode_mbr_basis_fmllr_$decode_suff & - done - wait - - done # loop recog + utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph + for dset in ${test_sets}; do + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + exp/tri3/graph data/${dset} exp/tri3/decode_${dset} & + done + wait fi +if [ $stage -le 12 ]; then + # chain TDNN + local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} +fi +exit 1 +# # decoding using various recognizers +# if [ $stage -le 16 ]; then + # # put tri2b last since it takes longest due to the large mismatch. + # for recog in tri2b_mc tri2b_mc_mmi_b0.1 tri2b; do + # # The graph from the ML directory is used in recipe + # recog2=`echo $recog | sed s/_mmi.*//` + # graph=exp/$recog2/graph_$lm + + # echo "### DECODING with $recog, noadapt, $lm ###" + # for dataset in data/REVERB_*{dt,et}/*; do + # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` + # steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ + # $graph $dataset \ + # exp/$recog/decode_$decode_suff & + # done + # wait + + # echo " ## MBR RESCORING with $recog, noadapt ##" + # for dataset in data/REVERB_*{dt,et}/*; do + # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` + # mkdir -p exp/$recog/decode_mbr_$decode_suff + # cp exp/$recog/decode_$decode_suff/lat.*.gz exp/$recog/decode_mbr_$decode_suff + # local/score_mbr.sh --cmd "$decode_cmd" \ + # $dataset data/lang_test_$lm/ exp/$recog/decode_mbr_$decode_suff & + # done + # wait + + # done # loop recog +# fi + +# # decoding using various recognizers with adaptation +# if [ $stage -le 11 ]; then + # # put tri2b last since it takes longest due to the large mismatch. + # for recog in tri2b_mc tri2b_mc_mmi_b0.1 tri2b; do + # # The graph from the ML directory is used in recipe + # recog2=`echo $recog | sed s/_mmi.*//` + # graph=exp/$recog2/graph_$lm + + # # set the adaptation data + # if [[ "$recog" =~ _mc ]]; then + # tr_dataset=REVERB_tr_cut/SimData_tr_for_1ch_A + # else + # tr_dataset=si_tr + # fi + + # echo "### DECODING with $recog, basis_fmllr, $lm ###" + # steps/get_fmllr_basis.sh --cmd "$train_cmd" --per-utt true data/$tr_dataset data/lang exp/$recog + # for dataset in data/REVERB_*{dt,et}/*; do + # ( + # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` + # steps/decode_basis_fmllr.sh --nj $nj_decode --cmd "$decode_cmd" \ + # $graph $dataset \ + # exp/$recog/decode_basis_fmllr_$decode_suff + # ) & + # done + # wait + + # echo " ## MBR RESCORING with $recog, basis_fmllr ##" + # for dataset in data/REVERB_*{dt,et}/*; do + # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` + # mkdir -p exp/$recog/decode_mbr_basis_fmllr_$decode_suff + # cp exp/$recog/decode_basis_fmllr_$decode_suff/lat.*.gz exp/$recog/decode_mbr_basis_fmllr_$decode_suff + # local/score_mbr.sh --cmd "$decode_cmd" \ + # $dataset data/lang_test_$lm/ exp/$recog/decode_mbr_basis_fmllr_$decode_suff & + # done + # wait + + # done # loop recog +# fi + # get all WERs with lmw=15 if [ $stage -le 12 ]; then local/get_results.sh From d816ad154a7f0eb8d8457f8b38922d63ac374eb2 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Mon, 24 Sep 2018 01:12:34 -0400 Subject: [PATCH 03/39] modified run.sh and add local/score.sh, get_reslts.sh --- egs/reverb/s5/local/get_results.sh | 10 +-- egs/reverb/s5/local/score.sh | 126 +++++++++++++++++++++++++---- egs/reverb/s5/run.sh | 25 +++--- 3 files changed, 127 insertions(+), 34 deletions(-) diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh index 7c74736e5d1..96eb43cb0a3 100755 --- a/egs/reverb/s5/local/get_results.sh +++ b/egs/reverb/s5/local/get_results.sh @@ -7,12 +7,12 @@ # No No No No BG No local/calc_wer.sh # No No Yes No BG No -local/calc_wer.sh --am tri2a_mc +#local/calc_wer.sh --am tri2a_mc # No Yes Yes No BG No -local/calc_wer.sh --am tri2a_mc --decode basis_fmllr +#local/calc_wer.sh --am tri2a_mc --decode basis_fmllr # Yes Yes Yes No TG No -local/calc_wer.sh --am tri2b_mc --lm tg_5k --decode basis_fmllr +#local/calc_wer.sh --am tri2b_mc --lm tg_5k --decode basis_fmllr # Yes Yes Yes Yes TG No -local/calc_wer.sh --am tri2b_mc_mmi_b0.1 --lm tg_5k --decode basis_fmllr +#local/calc_wer.sh --am tri2b_mc_mmi_b0.1 --lm tg_5k --decode basis_fmllr # Yes Yes Yes Yes TG Yes -local/calc_wer.sh --am tri2b_mc_mmi_b0.1 --lm tg_5k --decode mbr_basis_fmllr +local/calc_wer.sh --am tri3 --lm tg_5k --decode diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index abd8149a672..9988c941441 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -1,23 +1,29 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal) # Apache 2.0 +# See the script steps/scoring/score_kaldi_cer.sh in case you need to evalutate CER + [ -f ./path.sh ] && . ./path.sh # begin configuration section. cmd=run.pl stage=0 -decode_mbr=true -word_ins_penalty=0.0 +decode_mbr=false +stats=true +beam=6 +word_ins_penalty=0.0,0.5,1.0 min_lmwt=7 max_lmwt=17 +iter=final #end configuration section. +echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; if [ $# -ne 3 ]; then - echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " + echo "Usage: $0 [--cmd (run.pl|queue.pl...)] " echo " Options:" echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." echo " --stage (0|1|2) # start scoring script from part-way through." @@ -37,21 +43,107 @@ for f in $symtab $dir/lat.1.gz $data/text; do [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; done -mkdir -p $dir/scoring/log -cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt +ref_filtering_cmd="cat" +[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter" +[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter" +hyp_filtering_cmd="cat" +[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter" +[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter" + + +if $decode_mbr ; then + echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty" +else + echo "$0: scoring with word insertion penalty=$word_ins_penalty" +fi + + +mkdir -p $dir/scoring_kaldi +cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; +if [ $stage -le 0 ]; then + + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + mkdir -p $dir/scoring_kaldi/penalty_$wip/log + + if $decode_mbr ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ + acwt=\`perl -e \"print 1.0/LMWT\"\`\; \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-mbr-decode --word-symbol-table=$symtab \ + ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $symtab \| \ + $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; + + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $symtab \| \ + $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; + fi + + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ + cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; + + done +fi -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-best-path --word-symbol-table=$symtab \ - ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; -# Note: the double level of quoting for the sed command -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cat $dir/scoring/LMWT.tra \| \ - utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ - compute-wer --text --mode=present \ - ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +if [ $stage -le 1 ]; then + + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + for lmwt in $(seq $min_lmwt $max_lmwt); do + # adding /dev/null to the command list below forces grep to output the filename + grep WER $dir/wer_${lmwt}_${wip} /dev/null + done + done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer || exit 1 + + best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer) + best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}') + best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}') + + if [ -z "$best_lmwt" ]; then + echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." + exit 1; + fi + + if $stats; then + mkdir -p $dir/scoring_kaldi/wer_details + echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight + echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty + + $cmd $dir/scoring_kaldi/log/stats1.log \ + cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ + align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ + utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ + utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; + + $cmd $dir/scoring_kaldi/log/stats2.log \ + cat $dir/scoring_kaldi/wer_details/per_utt \| \ + utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ + sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; + + $cmd $dir/scoring_kaldi/log/wer_bootci.log \ + compute-wer-bootci --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ + '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; + + fi +fi + +# If we got here, the scoring was successful. +# As a small aid to prevent confusion, we remove all wer_{?,??} files; +# these originate from the previous version of the scoring files +# i keep both statement here because it could lead to confusion about +# the capabilities of the script (we don't do cer in the script) +rm $dir/wer_{?,??} 2>/dev/null +rm $dir/cer_{?,??} 2>/dev/null exit 0; diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index fe35430163e..7db9aec1599 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -67,7 +67,7 @@ lm="tg_5k" # number of jobs for feature extraction and model training nj=92 -decode_nj=20 +decode_nj=10 # number of jobs for decoding nj_decode=8 @@ -87,17 +87,17 @@ fi if [ $stage -le 2 ]; then # Prepare wsjcam0 clean data and wsj0 language model. - # local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 + local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 - # # Prepare merged BEEP/CMU dictionary. - # local/wsj_prepare_beep_dict.sh + # Prepare merged BEEP/CMU dictionary. + local/wsj_prepare_beep_dict.sh - # # Prepare wordlists, etc. + # Prepare wordlists, etc. - # utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang + utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang - # # Prepare directory structure for clean data. Apply some language model fixes. - # local/wsjcam0_format_data.sh + # Prepare directory structure for clean data. Apply some language model fixes. + local/wsjcam0_format_data.sh local/train_lms_srilm.sh \ --train-text data/${train_set}/text --dev-text data/dt_simu_8ch/text \ @@ -105,7 +105,7 @@ if [ $stage -le 2 ]; then data/ data/srilm LM=data/srilm/best_3gram.gz - # Compiles G for reverb trigram LM + # Compiles G for reverb 3-gram LM utils/format_lm.sh \ data/lang $LM data/local/dict/lexicon.txt data/lang @@ -179,7 +179,7 @@ if [ $stage -le 8 ]; then fi if [ $stage -le 9 ]; then - utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph + utils/mkgraph.sh data/lang_test_$lm exp/tri2 exp/tri2/graph for dset in ${test_sets}; do steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ exp/tri2/graph data/${dset} exp/tri2/decode_${dset} & @@ -196,7 +196,7 @@ if [ $stage -le 10 ]; then fi if [ $stage -le 11 ]; then - utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph + utils/mkgraph.sh data/lang_test_$lm exp/tri3 exp/tri3/graph for dset in ${test_sets}; do steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ exp/tri3/graph data/${dset} exp/tri3/decode_${dset} & @@ -206,7 +206,8 @@ fi if [ $stage -le 12 ]; then # chain TDNN - local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} + local/chain/run_tdnn.sh --nj ${nj} --stage 16 --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \ + --lm-suffix _test_$lm fi exit 1 # # decoding using various recognizers From 13ce9d0d17c032b36d75312ba004299ec9680f5a Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Tue, 2 Oct 2018 13:31:41 -0400 Subject: [PATCH 04/39] update and clean up scripts --- egs/reverb/s5/RESULTS | 212 +++++------------- egs/reverb/s5/local/chain/run_tdnn_lstm.sh | 1 + .../s5/local/chain/tuning/run_tdnn_1a.sh | 1 + .../s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 12 +- egs/reverb/s5/local/get_results.sh | 35 +-- egs/reverb/s5/local/score.sh | 94 ++++---- egs/reverb/s5/run.sh | 117 +--------- 7 files changed, 144 insertions(+), 328 deletions(-) create mode 120000 egs/reverb/s5/local/chain/run_tdnn_lstm.sh diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index 3537852a827..589740c1507 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -1,150 +1,62 @@ -#################### -exp/tri2a/decode_bg_5k_REVERB_*dt* -RealData_dt_for_1ch_far_room1_A 89.13 -RealData_dt_for_1ch_near_room1_A 90.27 -SimData_dt_for_1ch_far_room1_A 22.44 -SimData_dt_for_1ch_far_room2_A 88.44 -SimData_dt_for_1ch_far_room3_A 91.27 -SimData_dt_for_1ch_near_room1_A 12.19 -SimData_dt_for_1ch_near_room2_A 42.74 -SimData_dt_for_1ch_near_room3_A 49.31 -Avg_Real(2) 89.70 -Avg_Sim(6) 51.06 - -exp/tri2a/decode_bg_5k_REVERB_*et* -RealData_et_for_1ch_far_room1_A 88.45 -RealData_et_for_1ch_near_room1_A 88.66 -SimData_et_for_1ch_far_room1_A 22.72 -SimData_et_for_1ch_far_room2_A 81.53 -SimData_et_for_1ch_far_room3_A 89.25 -SimData_et_for_1ch_near_room1_A 14.37 -SimData_et_for_1ch_near_room2_A 40.46 -SimData_et_for_1ch_near_room3_A 51.50 -Avg_Real(2) 88.56 -Avg_Sim(6) 49.97 - -#################### -exp/tri2a_mc/decode_bg_5k_REVERB_*dt* -RealData_dt_for_1ch_far_room1_A 53.38 -RealData_dt_for_1ch_near_room1_A 56.27 -SimData_dt_for_1ch_far_room1_A 16.96 -SimData_dt_for_1ch_far_room2_A 44.15 -SimData_dt_for_1ch_far_room3_A 49.88 -SimData_dt_for_1ch_near_room1_A 15.00 -SimData_dt_for_1ch_near_room2_A 21.81 -SimData_dt_for_1ch_near_room3_A 25.10 -Avg_Real(2) 54.83 -Avg_Sim(6) 28.82 - -exp/tri2a_mc/decode_bg_5k_REVERB_*et* -RealData_et_for_1ch_far_room1_A 52.94 -RealData_et_for_1ch_near_room1_A 55.35 -SimData_et_for_1ch_far_room1_A 18.91 -SimData_et_for_1ch_far_room2_A 37.33 -SimData_et_for_1ch_far_room3_A 46.69 -SimData_et_for_1ch_near_room1_A 17.77 -SimData_et_for_1ch_near_room2_A 21.23 -SimData_et_for_1ch_near_room3_A 26.17 -Avg_Real(2) 54.14 -Avg_Sim(6) 28.02 - -#################### -exp/tri2a_mc/decode_basis_fmllr_bg_5k_REVERB_*dt* -RealData_dt_for_1ch_far_room1_A 46.27 -RealData_dt_for_1ch_near_room1_A 48.85 -SimData_dt_for_1ch_far_room1_A 15.59 -SimData_dt_for_1ch_far_room2_A 35.86 -SimData_dt_for_1ch_far_room3_A 39.54 -SimData_dt_for_1ch_near_room1_A 12.78 -SimData_dt_for_1ch_near_room2_A 17.75 -SimData_dt_for_1ch_near_room3_A 20.23 -Avg_Real(2) 47.56 -Avg_Sim(6) 23.62 - -exp/tri2a_mc/decode_basis_fmllr_bg_5k_REVERB_*et* -RealData_et_for_1ch_far_room1_A 48.11 -RealData_et_for_1ch_near_room1_A 48.42 -SimData_et_for_1ch_far_room1_A 16.57 -SimData_et_for_1ch_far_room2_A 31.54 -SimData_et_for_1ch_far_room3_A 39.32 -SimData_et_for_1ch_near_room1_A 14.31 -SimData_et_for_1ch_near_room2_A 18.42 -SimData_et_for_1ch_near_room3_A 21.03 -Avg_Real(2) 48.27 -Avg_Sim(6) 23.53 - -#################### -exp/tri2b_mc/decode_basis_fmllr_tg_5k_REVERB_*dt* -RealData_dt_for_1ch_far_room1_A 34.04 -RealData_dt_for_1ch_near_room1_A 33.37 -SimData_dt_for_1ch_far_room1_A 10.57 -SimData_dt_for_1ch_far_room2_A 22.63 -SimData_dt_for_1ch_far_room3_A 25.00 -SimData_dt_for_1ch_near_room1_A 7.57 -SimData_dt_for_1ch_near_room2_A 10.97 -SimData_dt_for_1ch_near_room3_A 12.59 -Avg_Real(2) 33.70 -Avg_Sim(6) 14.89 - -exp/tri2b_mc/decode_basis_fmllr_tg_5k_REVERB_*et* -RealData_et_for_1ch_far_room1_A 33.49 -RealData_et_for_1ch_near_room1_A 34.72 -SimData_et_for_1ch_far_room1_A 10.03 -SimData_et_for_1ch_far_room2_A 20.16 -SimData_et_for_1ch_far_room3_A 25.08 -SimData_et_for_1ch_near_room1_A 8.45 -SimData_et_for_1ch_near_room2_A 11.16 -SimData_et_for_1ch_near_room3_A 12.88 -Avg_Real(2) 34.11 -Avg_Sim(6) 14.63 - -#################### -exp/tri2b_mc_mmi_b0.1/decode_basis_fmllr_tg_5k_REVERB_*dt* -RealData_dt_for_1ch_far_room1_A 31.17 -RealData_dt_for_1ch_near_room1_A 31.82 -SimData_dt_for_1ch_far_room1_A 8.53 -SimData_dt_for_1ch_far_room2_A 17.43 -SimData_dt_for_1ch_far_room3_A 21.04 -SimData_dt_for_1ch_near_room1_A 6.78 -SimData_dt_for_1ch_near_room2_A 8.97 -SimData_dt_for_1ch_near_room3_A 10.01 -Avg_Real(2) 31.50 -Avg_Sim(6) 12.13 - -exp/tri2b_mc_mmi_b0.1/decode_basis_fmllr_tg_5k_REVERB_*et* -RealData_et_for_1ch_far_room1_A 31.20 -RealData_et_for_1ch_near_room1_A 30.98 -SimData_et_for_1ch_far_room1_A 8.42 -SimData_et_for_1ch_far_room2_A 17.63 -SimData_et_for_1ch_far_room3_A 20.71 -SimData_et_for_1ch_near_room1_A 7.03 -SimData_et_for_1ch_near_room2_A 9.50 -SimData_et_for_1ch_near_room3_A 11.11 -Avg_Real(2) 31.09 -Avg_Sim(6) 12.40 - -#################### -exp/tri2b_mc_mmi_b0.1/decode_mbr_basis_fmllr_tg_5k_REVERB_*dt* -RealData_dt_for_1ch_far_room1_A 30.42 -RealData_dt_for_1ch_near_room1_A 31.50 -SimData_dt_for_1ch_far_room1_A 8.24 -SimData_dt_for_1ch_far_room2_A 17.25 -SimData_dt_for_1ch_far_room3_A 20.72 -SimData_dt_for_1ch_near_room1_A 6.76 -SimData_dt_for_1ch_near_room2_A 8.87 -SimData_dt_for_1ch_near_room3_A 9.92 -Avg_Real(2) 30.96 -Avg_Sim(6) 11.96 - -exp/tri2b_mc_mmi_b0.1/decode_mbr_basis_fmllr_tg_5k_REVERB_*et* -RealData_et_for_1ch_far_room1_A 30.89 -RealData_et_for_1ch_near_room1_A 31.01 -SimData_et_for_1ch_far_room1_A 8.20 -SimData_et_for_1ch_far_room2_A 17.34 -SimData_et_for_1ch_far_room3_A 20.56 -SimData_et_for_1ch_near_room1_A 6.91 -SimData_et_for_1ch_near_room2_A 9.50 -SimData_et_for_1ch_near_room3_A 10.93 -Avg_Real(2) 30.95 -Avg_Sim(6) 12.24 - +######################################## +GMM RESULTs: +exp/tri3/decode_dt_real_1ch +%WER 34.18 [ 500 / 1463, 24 ins, 125 del, 351 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_17_1.0_far_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_far_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_far_room3 +%WER 29.63 [ 475 / 1603, 24 ins, 127 del, 324 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_15_0.5_near_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_near_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_near_room3 + +exp/tri3/decode_dt_simu_1ch +%WER 6.78 [ 276 / 4071, 38 ins, 42 del, 196 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room1 +%WER 18.28 [ 742 / 4058, 65 ins, 155 del, 522 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_far_room2 +%WER 19.78 [ 800 / 4045, 77 ins, 148 del, 575 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_13_0.0_far_room3 +%WER 5.53 [ 225 / 4071, 36 ins, 29 del, 160 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_near_room1 +%WER 7.81 [ 317 / 4058, 48 ins, 37 del, 232 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_0.0_near_room2 +%WER 10.70 [ 433 / 4045, 47 ins, 86 del, 300 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_13_0.5_near_room3 + +exp/tri3/decode_et_real_1ch +%WER 33.09 [ 980 / 2962, 103 ins, 157 del, 720 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_far_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_far_room3 +%WER 33.18 [ 1039 / 3131, 104 ins, 194 del, 741 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_near_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_near_room3 + +exp/tri3/decode_et_simu_1ch +%WER 7.47 [ 441 / 5907, 73 ins, 48 del, 320 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 +%WER 18.31 [ 1140 / 6226, 128 ins, 191 del, 821 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_13_0.0_far_room2 +%WER 21.81 [ 1280 / 5868, 109 ins, 273 del, 898 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 +%WER 7.26 [ 429 / 5907, 77 ins, 42 del, 310 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_16_1.0_near_room1 +%WER 9.52 [ 593 / 6226, 78 ins, 86 del, 429 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_13_0.5_near_room2 +%WER 11.96 [ 702 / 5868, 60 ins, 151 del, 491 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_13_1.0_near_room3 +######################################## +TDNN RESULTs: +exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* +%WER 19.62 [ 287 / 1463, 29 ins, 55 del, 203 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.0_far_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_far_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_far_room3 +%WER 17.65 [ 283 / 1603, 27 ins, 67 del, 189 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_12_0.0_near_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_near_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_near_room3 +%WER 3.29 [ 134 / 4071, 17 ins, 25 del, 92 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_far_room1 +%WER 7.02 [ 285 / 4058, 27 ins, 53 del, 205 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 +%WER 6.85 [ 277 / 4045, 20 ins, 54 del, 203 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_1.0_far_room3 +%WER 2.73 [ 111 / 4071, 10 ins, 18 del, 83 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_near_room1 +%WER 3.45 [ 140 / 4058, 14 ins, 24 del, 102 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.5_near_room2 +%WER 3.93 [ 159 / 4045, 23 ins, 26 del, 110 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.0_near_room3 + +exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* +%WER 19.04 [ 564 / 2962, 53 ins, 94 del, 417 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_far_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_far_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_far_room3 +%WER 18.17 [ 569 / 3131, 48 ins, 123 del, 398 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_near_room1 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_near_room2 +%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_near_room3 +%WER 3.72 [ 220 / 5907, 24 ins, 41 del, 155 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room1 +%WER 7.40 [ 461 / 6226, 41 ins, 97 del, 323 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room2 +%WER 7.26 [ 426 / 5868, 33 ins, 101 del, 292 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 +%WER 3.18 [ 188 / 5907, 25 ins, 38 del, 125 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room1 +%WER 4.87 [ 303 / 6226, 31 ins, 64 del, 208 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 +%WER 4.84 [ 284 / 5868, 23 ins, 64 del, 197 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_1.0_near_room3 diff --git a/egs/reverb/s5/local/chain/run_tdnn_lstm.sh b/egs/reverb/s5/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/reverb/s5/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh index 61cc8b97d41..c96529044d4 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh @@ -234,6 +234,7 @@ if [ $stage -le 16 ]; then for data in $test_sets; do ( steps/nnet3/decode.sh \ + --stage 3 \ --acwt 1.0 --post-decode-acwt 10.0 \ --extra-left-context $chunk_left_context \ --extra-right-context $chunk_right_context \ diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index cca08d791fa..9369e00a7ba 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -7,15 +7,15 @@ set -euo pipefail # (some of which are also used in this script directly). stage=0 nj=96 -train_set=train_worn_u400k_cleaned -test_sets="dev_worn dev_beamformit_dereverb_ref" -gmm=tri3_cleaned -nnet3_affix=_train_worn_u400k_cleaned +train_set=tr_simu_8ch +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch" +gmm=tri3 +nnet3_affix=_tr_simu_8ch lm_suffix= # The rest are configs specific to this script. Most of the parameters # are just hardcoded at this level, in the commands below. -affix=_1a_aug # affix for the TDNN directory name +affix=1a # affix for the TDNN directory name tree_affix= train_stage=-10 get_egs_stage=-10 @@ -66,7 +66,7 @@ fi # The iVector-extraction and feature-dumping parts are the same as the standard # nnet3 setup, and you can skip them by setting "--stage 11" if you have already # run those things. -local/nnet3/run_ivector_common_old.sh --stage $stage \ +local/nnet3/run_ivector_common.sh --stage $stage \ --train-set $train_set \ --test-sets "$test_sets" \ --gmm $gmm \ diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh index 96eb43cb0a3..7eb632dc40f 100755 --- a/egs/reverb/s5/local/get_results.sh +++ b/egs/reverb/s5/local/get_results.sh @@ -1,18 +1,23 @@ #!/bin/bash -# Reproduce selected results in Table 1 from Weninger et al. (2014) # "Our baselines" - -# LDA-STC fMLLR MCT DT LM MBR -# No No No No BG No -local/calc_wer.sh -# No No Yes No BG No -#local/calc_wer.sh --am tri2a_mc -# No Yes Yes No BG No -#local/calc_wer.sh --am tri2a_mc --decode basis_fmllr -# Yes Yes Yes No TG No -#local/calc_wer.sh --am tri2b_mc --lm tg_5k --decode basis_fmllr -# Yes Yes Yes Yes TG No -#local/calc_wer.sh --am tri2b_mc_mmi_b0.1 --lm tg_5k --decode basis_fmllr -# Yes Yes Yes Yes TG Yes -local/calc_wer.sh --am tri3 --lm tg_5k --decode +echo "########################################" +echo "GMM RESULTs:" +echo "exp/tri3/decode_dt_real_1ch" +cat exp/tri3/decode_dt_real_1ch/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_1ch" +cat exp/tri3/decode_dt_simu_1ch/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_1ch" +cat exp/tri3/decode_et_real_1ch/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_1ch" +cat exp/tri3/decode_et_simu_1ch/scoring_kaldi/best_wer* +echo "########################################" +echo "TDNN RESULTs:" +echo "exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt*" +cat exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt*/scoring_kaldi/best_wer_* +echo "" +echo "exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et*" +cat exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et*/scoring_kaldi/best_wer_* diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index 9988c941441..55f7f337481 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -8,7 +8,7 @@ # begin configuration section. cmd=run.pl -stage=0 +stage=1 decode_mbr=false stats=true beam=6 @@ -61,6 +61,10 @@ fi mkdir -p $dir/scoring_kaldi cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; +tasks="\ +near_room1 far_room1 \ +near_room2 far_room2 \ +near_room3 far_room3" if [ $stage -le 0 ]; then for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do @@ -85,57 +89,57 @@ if [ $stage -le 0 ]; then utils/int2sym.pl -f 2- $symtab \| \ $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; fi - - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ - cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ - compute-wer --text --mode=present \ - ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; - + for task in ${tasks}; do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ + grep $task $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_${wip}_${task} || exit 1; + done done fi if [ $stage -le 1 ]; then + for task in ${tasks}; do + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + for lmwt in $(seq $min_lmwt $max_lmwt); do + # adding /dev/null to the command list below forces grep to output the filename + grep WER $dir/wer_${lmwt}_${wip}_${task} /dev/null + done + done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer_${task} || exit 1 + + best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer_${task}) + best_wip=$(echo $best_wer_file | awk -F_ '{N=NF-2; print $N}') + best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-3; print $N}') + + if [ -z "$best_lmwt" ]; then + echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." + exit 1; + fi + if $stats; then + mkdir -p $dir/scoring_kaldi/wer_details + echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight + echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty + + $cmd $dir/scoring_kaldi/log/stats1.log \ + cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ + align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ + utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ + utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; + + $cmd $dir/scoring_kaldi/log/stats2.log \ + cat $dir/scoring_kaldi/wer_details/per_utt \| \ + utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ + sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; + + $cmd $dir/scoring_kaldi/log/wer_bootci.log \ + compute-wer-bootci --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ + '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; - for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do - for lmwt in $(seq $min_lmwt $max_lmwt); do - # adding /dev/null to the command list below forces grep to output the filename - grep WER $dir/wer_${lmwt}_${wip} /dev/null - done - done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer || exit 1 - - best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer) - best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}') - best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}') - - if [ -z "$best_lmwt" ]; then - echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." - exit 1; - fi - - if $stats; then - mkdir -p $dir/scoring_kaldi/wer_details - echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight - echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty - - $cmd $dir/scoring_kaldi/log/stats1.log \ - cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ - align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ - utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ - utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; - - $cmd $dir/scoring_kaldi/log/stats2.log \ - cat $dir/scoring_kaldi/wer_details/per_utt \| \ - utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ - sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; - - $cmd $dir/scoring_kaldi/log/wer_bootci.log \ - compute-wer-bootci --mode=present \ - ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ - '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; - - fi + fi + done fi # If we got here, the scoring was successful. diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 7db9aec1599..2b17218581b 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -1,6 +1,7 @@ #!/bin/bash # Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe) +# JHU CLSP (Szu-Jui Chen) # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -59,21 +60,13 @@ fi train_set=tr_simu_8ch test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch" -# LDA context size (left/right) (4 is default) -context_size=4 - # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k" # number of jobs for feature extraction and model training nj=92 -decode_nj=10 - # number of jobs for decoding -nj_decode=8 - -# set to true if you want the tri2a systems (re-implementation of the HTK baselines) -do_tri2a=true +decode_nj=10 if [ ${stage} -le 1 ]; then ### Task dependent. You have to make the following data preparation part by yourself. @@ -93,7 +86,6 @@ if [ $stage -le 2 ]; then local/wsj_prepare_beep_dict.sh # Prepare wordlists, etc. - utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang # Prepare directory structure for clean data. Apply some language model fixes. @@ -108,27 +100,6 @@ if [ $stage -le 2 ]; then # Compiles G for reverb 3-gram LM utils/format_lm.sh \ data/lang $LM data/local/dict/lexicon.txt data/lang - - # Now it's getting more interesting. - # Prepare the multi-condition training data and the REVERB dt set. - # This also extracts MFCC features (!!!) - # This creates the data sets called REVERB_tr_cut and REVERB_dt. - # If you have processed waveforms, this is a good starting point to integrate them. - # For example, you could have something like - # local/REVERB_wsjcam0_data_prep.sh /path/to/processed/REVERB_WSJCAM0_dt processed_REVERB_dt dt - # The first argument is supposed to point to a folder that has the same structure - # as the REVERB corpus. - # local/REVERB_wsjcam0_data_prep.sh $reverb_tr REVERB_tr_cut tr - # local/REVERB_wsjcam0_data_prep.sh $reverb_dt REVERB_dt dt - # local/REVERB_wsjcam0_data_prep.sh $reverb_et REVERB_et et - - # # Prepare the REVERB "real" dt set from MCWSJAV corpus. - # # This corpus is *never* used for training. - # # This creates the data set called REVERB_Real_dt and its subfolders - # local/REVERB_mcwsjav_data_prep.sh $reverb_real_dt REVERB_Real_dt dt - # # The MLF file exists only once in the corpus, namely in the real_dt directory - # # so we pass it as 4th argument - # local/REVERB_mcwsjav_data_prep.sh $reverb_real_et REVERB_Real_et et $reverb_real_dt/mlf/WSJ.mlf fi if [ $stage -le 3 ]; then @@ -139,10 +110,8 @@ if [ $stage -le 3 ]; then fi if [ $stage -le 4 ]; then - # Extract MFCC features for clean sets. - # For the non-clean data sets, this is outsourced to the data preparation scripts. + # Extract MFCC features for train and test sets. mfccdir=mfcc - ### for x in si_tr si_dt; do it seems that the number of transcriptions of si_dt is not correct. for x in ${train_set} ${test_sets}; do steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 \ data/$x exp/make_mfcc/$x $mfccdir @@ -150,12 +119,6 @@ if [ $stage -le 4 ]; then done fi -# if [ $stage -le 5 ]; then - # make a subset for monophone training - #utils/subset_data_dir.sh --shortest data/${train_set} 30000 data/${train_set}_30kshort - #utils/subset_data_dir.sh data/${train_set}_10kshort 4000 data/${train_set}_4kshort -# fi - if [ $stage -le 6 ]; then # Starting basic training on MFCC features steps/train_mono.sh --nj $nj --cmd "$train_cmd" \ @@ -209,78 +172,8 @@ if [ $stage -le 12 ]; then local/chain/run_tdnn.sh --nj ${nj} --stage 16 --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \ --lm-suffix _test_$lm fi -exit 1 -# # decoding using various recognizers -# if [ $stage -le 16 ]; then - # # put tri2b last since it takes longest due to the large mismatch. - # for recog in tri2b_mc tri2b_mc_mmi_b0.1 tri2b; do - # # The graph from the ML directory is used in recipe - # recog2=`echo $recog | sed s/_mmi.*//` - # graph=exp/$recog2/graph_$lm - - # echo "### DECODING with $recog, noadapt, $lm ###" - # for dataset in data/REVERB_*{dt,et}/*; do - # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - # steps/decode.sh --nj $nj_decode --cmd "$decode_cmd" \ - # $graph $dataset \ - # exp/$recog/decode_$decode_suff & - # done - # wait - # echo " ## MBR RESCORING with $recog, noadapt ##" - # for dataset in data/REVERB_*{dt,et}/*; do - # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - # mkdir -p exp/$recog/decode_mbr_$decode_suff - # cp exp/$recog/decode_$decode_suff/lat.*.gz exp/$recog/decode_mbr_$decode_suff - # local/score_mbr.sh --cmd "$decode_cmd" \ - # $dataset data/lang_test_$lm/ exp/$recog/decode_mbr_$decode_suff & - # done - # wait - - # done # loop recog -# fi - -# # decoding using various recognizers with adaptation -# if [ $stage -le 11 ]; then - # # put tri2b last since it takes longest due to the large mismatch. - # for recog in tri2b_mc tri2b_mc_mmi_b0.1 tri2b; do - # # The graph from the ML directory is used in recipe - # recog2=`echo $recog | sed s/_mmi.*//` - # graph=exp/$recog2/graph_$lm - - # # set the adaptation data - # if [[ "$recog" =~ _mc ]]; then - # tr_dataset=REVERB_tr_cut/SimData_tr_for_1ch_A - # else - # tr_dataset=si_tr - # fi - - # echo "### DECODING with $recog, basis_fmllr, $lm ###" - # steps/get_fmllr_basis.sh --cmd "$train_cmd" --per-utt true data/$tr_dataset data/lang exp/$recog - # for dataset in data/REVERB_*{dt,et}/*; do - # ( - # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - # steps/decode_basis_fmllr.sh --nj $nj_decode --cmd "$decode_cmd" \ - # $graph $dataset \ - # exp/$recog/decode_basis_fmllr_$decode_suff - # ) & - # done - # wait - - # echo " ## MBR RESCORING with $recog, basis_fmllr ##" - # for dataset in data/REVERB_*{dt,et}/*; do - # decode_suff=${lm}_`echo $dataset | awk -F '/' '{print $2 "_" $3}'` - # mkdir -p exp/$recog/decode_mbr_basis_fmllr_$decode_suff - # cp exp/$recog/decode_basis_fmllr_$decode_suff/lat.*.gz exp/$recog/decode_mbr_basis_fmllr_$decode_suff - # local/score_mbr.sh --cmd "$decode_cmd" \ - # $dataset data/lang_test_$lm/ exp/$recog/decode_mbr_basis_fmllr_$decode_suff & - # done - # wait - - # done # loop recog -# fi - -# get all WERs with lmw=15 -if [ $stage -le 12 ]; then +# get all WERs. +if [ $stage -le 13 ]; then local/get_results.sh fi From 53fc524026770b537dc58582814f93f57d8ec110 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Tue, 2 Oct 2018 16:55:42 -0400 Subject: [PATCH 05/39] minor modification --- egs/reverb/s5/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 2b17218581b..458c96f85ab 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe) -# JHU CLSP (Szu-Jui Chen) +# Johns Hopkins University (author: Szu-Jui Chen) # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 3a648fc75a2c55d9a2c385077f4b12060bcc94cb Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Tue, 2 Oct 2018 17:20:12 -0400 Subject: [PATCH 06/39] minor modification --- egs/chime5/s5/path.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/egs/chime5/s5/path.sh b/egs/chime5/s5/path.sh index 6fc01352d90..fb1c0489386 100644 --- a/egs/chime5/s5/path.sh +++ b/egs/chime5/s5/path.sh @@ -1,6 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -[ -f $KALDI_ROOT/tools/extras/env.sh ] && . $KALDI_ROOT/tools/extras/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh From 2b112e84d5574fe0969675df95c0b56352f22ce1 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Wed, 3 Oct 2018 02:15:48 -0400 Subject: [PATCH 07/39] parameter update --- egs/reverb/s5/local/score.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index 55f7f337481..f80c444cd50 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -8,7 +8,7 @@ # begin configuration section. cmd=run.pl -stage=1 +stage=0 decode_mbr=false stats=true beam=6 From 8145d3c2ed6c76debd605b90da24f626212e8ede Mon Sep 17 00:00:00 2001 From: Szu-JuiChen <31828751+Szu-JuiChen@users.noreply.github.com> Date: Wed, 3 Oct 2018 09:19:47 -0400 Subject: [PATCH 08/39] Update run.sh --- egs/reverb/s5/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 458c96f85ab..4ebc0e05b0b 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -169,7 +169,7 @@ fi if [ $stage -le 12 ]; then # chain TDNN - local/chain/run_tdnn.sh --nj ${nj} --stage 16 --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \ + local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \ --lm-suffix _test_$lm fi From 37960b24134191d240a968fff9c2e46e728c2442 Mon Sep 17 00:00:00 2001 From: Szu-JuiChen <31828751+Szu-JuiChen@users.noreply.github.com> Date: Wed, 3 Oct 2018 09:30:08 -0400 Subject: [PATCH 09/39] Update run_tdnn_1a.sh --- egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh index c96529044d4..61cc8b97d41 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh @@ -234,7 +234,6 @@ if [ $stage -le 16 ]; then for data in $test_sets; do ( steps/nnet3/decode.sh \ - --stage 3 \ --acwt 1.0 --post-decode-acwt 10.0 \ --extra-left-context $chunk_left_context \ --extra-right-context $chunk_right_context \ From cb026a1615fcab203d3d40248c9ce18842c7e6f5 Mon Sep 17 00:00:00 2001 From: Szu-JuiChen <31828751+Szu-JuiChen@users.noreply.github.com> Date: Wed, 3 Oct 2018 13:56:09 -0400 Subject: [PATCH 10/39] Update run_ivector_common.sh --- egs/reverb/s5/local/nnet3/run_ivector_common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/local/nnet3/run_ivector_common.sh b/egs/reverb/s5/local/nnet3/run_ivector_common.sh index e28e5ce996d..422790ff927 100755 --- a/egs/reverb/s5/local/nnet3/run_ivector_common.sh +++ b/egs/reverb/s5/local/nnet3/run_ivector_common.sh @@ -140,7 +140,7 @@ if [ $stage -le 6 ]; then # Also extract iVectors for the test data, but in this case we don't need the speed # perturbation (sp). for data in $test_sets; do - steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \ + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 10 \ data/${data}_hires exp/nnet3${nnet3_affix}/extractor \ exp/nnet3${nnet3_affix}/ivectors_${data}_hires done From e775685ee02c77d93744595c7ea31ece40f2cbf5 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Thu, 4 Oct 2018 00:43:01 -0400 Subject: [PATCH 11/39] fix nan value issue --- egs/reverb/s5/RESULTS | 16 ---------------- egs/reverb/s5/local/score.sh | 15 +++++++++++---- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index 589740c1507..8b26eecdd2a 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -2,11 +2,7 @@ GMM RESULTs: exp/tri3/decode_dt_real_1ch %WER 34.18 [ 500 / 1463, 24 ins, 125 del, 351 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_17_1.0_far_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_far_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_far_room3 %WER 29.63 [ 475 / 1603, 24 ins, 127 del, 324 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_15_0.5_near_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_near_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_7_0.0_near_room3 exp/tri3/decode_dt_simu_1ch %WER 6.78 [ 276 / 4071, 38 ins, 42 del, 196 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room1 @@ -18,11 +14,7 @@ exp/tri3/decode_dt_simu_1ch exp/tri3/decode_et_real_1ch %WER 33.09 [ 980 / 2962, 103 ins, 157 del, 720 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_far_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_far_room3 %WER 33.18 [ 1039 / 3131, 104 ins, 194 del, 741 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_near_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_7_0.0_near_room3 exp/tri3/decode_et_simu_1ch %WER 7.47 [ 441 / 5907, 73 ins, 48 del, 320 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 @@ -35,11 +27,7 @@ exp/tri3/decode_et_simu_1ch TDNN RESULTs: exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* %WER 19.62 [ 287 / 1463, 29 ins, 55 del, 203 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.0_far_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_far_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_far_room3 %WER 17.65 [ 283 / 1603, 27 ins, 67 del, 189 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_12_0.0_near_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_near_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_7_0.0_near_room3 %WER 3.29 [ 134 / 4071, 17 ins, 25 del, 92 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_far_room1 %WER 7.02 [ 285 / 4058, 27 ins, 53 del, 205 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 %WER 6.85 [ 277 / 4045, 20 ins, 54 del, 203 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_1.0_far_room3 @@ -49,11 +37,7 @@ exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* %WER 19.04 [ 564 / 2962, 53 ins, 94 del, 417 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_far_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_far_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_far_room3 %WER 18.17 [ 569 / 3131, 48 ins, 123 del, 398 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_near_room1 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_near_room2 -%WER -nan [ 0 / 0, 0 ins, 0 del, 0 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_7_0.0_near_room3 %WER 3.72 [ 220 / 5907, 24 ins, 41 del, 155 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room1 %WER 7.40 [ 461 / 6226, 41 ins, 97 del, 323 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room2 %WER 7.26 [ 426 / 5868, 33 ins, 101 del, 292 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index f80c444cd50..a6ec05e3a43 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -61,10 +61,17 @@ fi mkdir -p $dir/scoring_kaldi cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; -tasks="\ -near_room1 far_room1 \ -near_room2 far_room2 \ -near_room3 far_room3" + +if echo $data | grep -q "real"; then + tasks="\ + near_room1 far_room1" +else + tasks="\ + near_room1 far_room1 \ + near_room2 far_room2 \ + near_room3 far_room3" +fi + if [ $stage -le 0 ]; then for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do From 6d04afcca861f3716c098abc40998455c5c964f8 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Thu, 4 Oct 2018 07:46:34 -0400 Subject: [PATCH 12/39] Added WPE --- egs/reverb/s5/local/prepare_real_data.sh | 9 +- egs/reverb/s5/local/prepare_simu_data.sh | 14 +- egs/reverb/s5/local/run_wpe.py | 49 +++++++ egs/reverb/s5/local/run_wpe.sh | 170 +++++++++++++++++++++++ egs/reverb/s5/run.sh | 22 +-- tools/extras/install_wpe.sh | 15 ++ 6 files changed, 268 insertions(+), 11 deletions(-) create mode 100644 egs/reverb/s5/local/run_wpe.py create mode 100755 egs/reverb/s5/local/run_wpe.sh create mode 100755 tools/extras/install_wpe.sh diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh index d7d16ba07bf..d7019697fc4 100755 --- a/egs/reverb/s5/local/prepare_real_data.sh +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -6,6 +6,7 @@ # https://github.com/kaldi-asr/kaldi/tree/master/egs/reverb/s5/local # Begin configuration section. +wavdir=${PWD}/wav # End configuration section . ./utils/parse_options.sh # accept options.. you can run this run.sh with the @@ -79,13 +80,19 @@ for nch in 1 2 8; do for task in dt et; do if [ ${task} == 'dt' ]; then audiodir=${reverb}/MC_WSJ_AV_Dev + audiodir_wpe=${wavdir}/WPE/MC_WSJ_AV_Dev elif [ ${task} == 'et' ]; then audiodir=${reverb}/MC_WSJ_AV_Eval + audiodir_wpe=${wavdir}/WPE/MC_WSJ_AV_Eval fi for x in `ls ${taskdir} | grep RealData | grep _${task}_`; do perl -se 'while(<>){m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; $id = lc $1; print "$id $dir$_";}' -- -dir=${audiodir} ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_real_${nch}ch_wav.scp + for x in `ls ${taskdir} | grep RealData | grep _${task}_`; do + perl -se 'while(<>){m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; $id = lc $1; print "$id $dir$_";}' -- -dir=${audiodir_wpe} ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_real_${nch}ch_wpe_wav.scp done # make a transcript for task in dt et; do @@ -109,7 +116,7 @@ for nch in 1 2 8; do for task in dt et; do datadir=data/${task}_real_${nch}ch mkdir -p ${datadir} - sort ${dir}/${task}_real_${nch}ch_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_real_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp sort ${dir}/${task}_real_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_real_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_${nch}ch.spk2utt > ${datadir}/spk2utt diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index 08eec3a038c..1047e6b5cca 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -55,6 +55,18 @@ for nch in 1 2 8; do done > ${dir}/${task}_simu_${nch}ch_wav.scp done + task=tr + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp + for task in dt et; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp + done + # make a transcript for task in tr dt et; do for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do @@ -77,7 +89,7 @@ for nch in 1 2 8; do for task in tr dt et; do datadir=data/${task}_simu_${nch}ch mkdir -p ${datadir} - sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_simu_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp sort ${dir}/${task}_simu_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_simu_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_${nch}ch.spk2utt > ${datadir}/spk2utt diff --git a/egs/reverb/s5/local/run_wpe.py b/egs/reverb/s5/local/run_wpe.py new file mode 100644 index 00000000000..dc8c40878f4 --- /dev/null +++ b/egs/reverb/s5/local/run_wpe.py @@ -0,0 +1,49 @@ +import numpy as np +import soundfile as sf +import time +import os, errno +from tqdm import tqdm +import argparse + +from nara_wpe.wpe import wpe +from nara_wpe.utils import stft, istft +from nara_wpe import project_root + +parser = argparse.ArgumentParser() +parser.add_argument('--files', '-f', nargs='+') +args = parser.parse_args() + +input_files = args.files[:len(args.files)//2] +output_files = args.files[len(args.files)//2:] +out_dir = os.path.dirname(output_files[0]) +try: + os.makedirs(out_dir) +except OSError as e: + if e.errno != errno.EEXIST: + raise + +stft_options = dict( + size=512, + shift=128, + window_length=None, + fading=True, + pad=True, + symmetric_window=False +) + +sampling_rate = 16000 +delay = 3 +iterations = 5 +taps = 10 + +signal_list = [ + sf.read(f)[0] + for f in input_files +] +y = np.stack(signal_list, axis=0) +Y = stft(y, **stft_options).transpose(2, 0, 1) +Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) +z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) + +for d in range(len(signal_list)): + sf.write(output_files[d], z[d,:], sampling_rate) diff --git a/egs/reverb/s5/local/run_wpe.sh b/egs/reverb/s5/local/run_wpe.sh new file mode 100755 index 00000000000..1203469abd4 --- /dev/null +++ b/egs/reverb/s5/local/run_wpe.sh @@ -0,0 +1,170 @@ +#!/bin/bash + +. ./cmd.sh +. ./path.sh + +# Config: +nj=20 +cmd=run.pl + +. utils/parse_options.sh || exit 1; + +# Set bash to 'debug' mode, it will exit on : +# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', +set -e +set -u +set -o pipefail + +miniconda_dir=$HOME/miniconda3/ +if [ ! -d $miniconda_dir ]; then + echo "$miniconda_dir does not exist. Please run '../../../tools/extras/install_miniconda.sh' and '../../../tools/extras/install_wpe.sh';" +fi + +# check if WPE is installed +result=`$HOME/miniconda3/bin/python -c "\ +try: + import nara_wpe + print('1') +except ImportError: + print('0')"` + +if [ "$result" == "1" ]; then + echo "WPE is installed" +else + echo "WPE is not installed. Please run ../../../tools/extras/install_wpe.sh" +fi + +dir=${PWD}/data/local/data + +for task in dt et; do + for nch in 1 2 8; do + wdir=exp/wpe_real_${task}_${nch}ch + mkdir -p $wdir/log + arrays=$wdir/channels + output_wavfiles=$wdir/wavfiles.list + if [ ${nch} == 1 ]; then + allwavs=`cat ${dir}/${task}_real_1ch_wav.scp | cut -d " " -f2` + allwavs_output=`cat ${dir}/${task}_real_1ch_wpe_wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' > $wdir/channels_input + echo $allwavs_output | tr ' ' '\n' > $wdir/channels_output + paste -d" " $wdir/channels_input $wdir/channels_output > $arrays + elif [ ${nch} == 2 ]; then + allwavs=`cat ${dir}/${task}_real_2ch_wav.scp | cut -d " " -f2` + allwavs_output=`cat ${dir}/${task}_real_2ch_wpe_wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==1' > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==0' > $wdir/channels.2nd + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==1' > $wdir/channels_output.1st + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==0' > $wdir/channels_output.2nd + paste -d" " $wdir/channels.1st $wdir/channels.2nd $wdir/channels_output.1st $wdir/channels_output.2nd > $arrays + elif [ ${nch} == 8 ]; then + allwavs=`cat ${dir}/${task}_real_8ch_wav.scp | cut -d " " -f2` + allwavs_output=`cat ${dir}/${task}_real_8ch_wpe_wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==1' > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==2' > $wdir/channels.2nd + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==3' > $wdir/channels.3rd + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==4' > $wdir/channels.4th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==5' > $wdir/channels.5th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==6' > $wdir/channels.6th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==7' > $wdir/channels.7th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==0' > $wdir/channels.8th + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==1' > $wdir/channels_output.1st + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==2' > $wdir/channels_output.2nd + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==3' > $wdir/channels_output.3rd + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==4' > $wdir/channels_output.4th + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==5' > $wdir/channels_output.5th + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==6' > $wdir/channels_output.6th + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==7' > $wdir/channels_output.7th + echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==0' > $wdir/channels_output.8th + paste -d" " $wdir/channels.1st $wdir/channels.2nd $wdir/channels.3rd $wdir/channels.4th $wdir/channels.5th $wdir/channels.6th $wdir/channels.7th $wdir/channels.8th $wdir/channels_output.1st $wdir/channels_output.2nd $wdir/channels_output.3rd $wdir/channels_output.4th $wdir/channels_output.5th $wdir/channels_output.6th $wdir/channels_output.7th $wdir/channels_output.8th > $arrays + fi + + # split the list for parallel processing + split_wavfiles="" + for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" + done + utils/split_scp.pl $arrays $split_wavfiles || exit 1; + + echo -e "Dereverberation - $task - real - $nch ch\n" + # making a shell script for each job + for n in `seq $nj`; do + cat <<-EOF > $wdir/log/wpe.$n.sh + while read line; do + $HOME/miniconda3/bin/python local/run_wpe.py \ + --file \$line + done < $output_wavfiles.$n + EOF + done + + chmod a+x $wdir/log/wpe.*.sh + $cmd JOB=1:$nj $wdir/log/wpe.JOB.log \ + $wdir/log/wpe.JOB.sh + done +done + +for task in tr dt et; do + for nch in 1 2 8; do + wdir=exp/wpe_simu_${task}_${nch}ch + mkdir -p $wdir/log + arrays=$wdir/channels + output_wavfiles=$wdir/wavfiles.list + if [ ${nch} == 1 ]; then + allwavs=`cat ${dir}/${task}_simu_1ch_wav.scp | cut -d " " -f2` + allwavs_output=`cat ${dir}/${task}_simu_1ch_wpe_wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' > $wdir/channels_input + echo $allwavs_output | tr ' ' '\n' > $wdir/channels_output + paste -d" " $wdir/channels_input $wdir/channels_output > $arrays + elif [ ${nch} == 2 ]; then + allwavs=`cat ${dir}/${task}_simu_2ch_wav.scp | cut -d " " -f2` + allwavs_output=`cat ${dir}/${task}_simu_2ch_wpe_wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | grep 'ch1' | sort > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | grep 'ch2' | sort > $wdir/channels.2nd + echo $allwavs_output | tr ' ' '\n' | grep 'ch1' | sort > $wdir/channels_output.1st + echo $allwavs_output | tr ' ' '\n' | grep 'ch2' | sort > $wdir/channels_output.2nd + paste -d" " $wdir/channels.1st $wdir/channels.2nd $wdir/channels_output.1st $wdir/channels_output.2nd > $arrays + elif [ ${nch} == 8 ]; then + allwavs=`cat ${dir}/${task}_simu_8ch_wav.scp | cut -d " " -f2` + allwavs_output=`cat ${dir}/${task}_simu_8ch_wpe_wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | grep 'ch1' | sort > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | grep 'ch2' | sort > $wdir/channels.2nd + echo $allwavs | tr ' ' '\n' | grep 'ch3' | sort > $wdir/channels.3rd + echo $allwavs | tr ' ' '\n' | grep 'ch4' | sort > $wdir/channels.4th + echo $allwavs | tr ' ' '\n' | grep 'ch5' | sort > $wdir/channels.5th + echo $allwavs | tr ' ' '\n' | grep 'ch6' | sort > $wdir/channels.6th + echo $allwavs | tr ' ' '\n' | grep 'ch7' | sort > $wdir/channels.7th + echo $allwavs | tr ' ' '\n' | grep 'ch8' | sort > $wdir/channels.8th + echo $allwavs_output | tr ' ' '\n' | grep 'ch1' | sort > $wdir/channels_output.1st + echo $allwavs_output | tr ' ' '\n' | grep 'ch2' | sort > $wdir/channels_output.2nd + echo $allwavs_output | tr ' ' '\n' | grep 'ch3' | sort > $wdir/channels_output.3rd + echo $allwavs_output | tr ' ' '\n' | grep 'ch4' | sort > $wdir/channels_output.4th + echo $allwavs_output | tr ' ' '\n' | grep 'ch5' | sort > $wdir/channels_output.5th + echo $allwavs_output | tr ' ' '\n' | grep 'ch6' | sort > $wdir/channels_output.6th + echo $allwavs_output | tr ' ' '\n' | grep 'ch7' | sort > $wdir/channels_output.7th + echo $allwavs_output | tr ' ' '\n' | grep 'ch8' | sort > $wdir/channels_output.8th + paste -d" " $wdir/channels.1st $wdir/channels.2nd $wdir/channels.3rd $wdir/channels.4th $wdir/channels.5th $wdir/channels.6th $wdir/channels.7th $wdir/channels.8th $wdir/channels_output.1st $wdir/channels_output.2nd $wdir/channels_output.3rd $wdir/channels_output.4th $wdir/channels_output.5th $wdir/channels_output.6th $wdir/channels_output.7th $wdir/channels_output.8th > $arrays + fi + + # split the list for parallel processing + split_wavfiles="" + for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" + done + utils/split_scp.pl $arrays $split_wavfiles || exit 1; + + echo -e "Dereverberation - $task - simu - $nch ch\n" + # making a shell script for each job + for n in `seq $nj`; do + cat <<-EOF > $wdir/log/wpe.$n.sh + while read line; do + $HOME/miniconda3/bin/python local/run_wpe.py \ + --file \$line + done < $output_wavfiles.$n + EOF + done + + chmod a+x $wdir/log/wpe.*.sh + $cmd JOB=1:$nj $wdir/log/wpe.JOB.log \ + $wdir/log/wpe.JOB.sh + done +done +echo "`basename $0` Done." diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 4ebc0e05b0b..99661320fe3 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -69,16 +69,20 @@ nj=92 decode_nj=10 if [ ${stage} -le 1 ]; then - ### Task dependent. You have to make the following data preparation part by yourself. - ### But you can utilize Kaldi recipes in most cases - wavdir=$PWD/wav - echo "stage 0: Data preparation" - local/generate_data.sh --wavdir ${wavdir} ${wsjcam0} - local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0} - local/prepare_real_data.sh ${reverb} + ### Task dependent. You have to make the following data preparation part by yourself. + ### But you can utilize Kaldi recipes in most cases + wavdir=$PWD/wav + echo "stage 0: Data preparation" + local/generate_data.sh --wavdir ${wavdir} ${wsjcam0} + local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0} + local/prepare_real_data.sh --wavdir ${wavdir} ${reverb} fi if [ $stage -le 2 ]; then + local/run_wpe.sh +fi + +if [ $stage -le 3 ]; then # Prepare wsjcam0 clean data and wsj0 language model. local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 @@ -102,14 +106,14 @@ if [ $stage -le 2 ]; then data/lang $LM data/local/dict/lexicon.txt data/lang fi -if [ $stage -le 3 ]; then +if [ $stage -le 4 ]; then for dset in ${train_set} ${test_sets}; do utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset} done fi -if [ $stage -le 4 ]; then +if [ $stage -le 5 ]; then # Extract MFCC features for train and test sets. mfccdir=mfcc for x in ${train_set} ${test_sets}; do diff --git a/tools/extras/install_wpe.sh b/tools/extras/install_wpe.sh new file mode 100755 index 00000000000..4d129fc6db7 --- /dev/null +++ b/tools/extras/install_wpe.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Installs nara-wpe with dependencies +# miniconda should be installed in $HOME/miniconda3/ + +miniconda_dir=$HOME/miniconda3/ + +if [ ! -d $miniconda_dir ]; then + echo "$miniconda_dir does not exist. Please run 'tools/extras/install_miniconda.sh" && exit 1; +fi + +$HOME/miniconda3/bin/python -m pip install soundfile +git clone https://github.com/fgnt/nara_wpe.git +cd nara_wpe +$HOME/miniconda3/bin/python -m pip install --editable . From c9e5f6a7613128b8df1c1e7e982e1656c93fb12f Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Sat, 6 Oct 2018 12:37:47 -0400 Subject: [PATCH 13/39] Remove partial tag --- egs/reverb/s5/RESULTS | 64 ++++++++++++++++++------------------ egs/reverb/s5/local/score.sh | 11 ++++--- egs/reverb/s5/run.sh | 3 +- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index 8b26eecdd2a..2bfda91a6a9 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -1,46 +1,46 @@ ######################################## GMM RESULTs: exp/tri3/decode_dt_real_1ch -%WER 34.18 [ 500 / 1463, 24 ins, 125 del, 351 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_17_1.0_far_room1 -%WER 29.63 [ 475 / 1603, 24 ins, 127 del, 324 sub ] [PARTIAL] exp/tri3/decode_dt_real_1ch/wer_15_0.5_near_room1 +%WER 34.18 [ 500 / 1463, 24 ins, 125 del, 351 sub ] exp/tri3/decode_dt_real_1ch/wer_17_1.0_far_room1 +%WER 29.63 [ 475 / 1603, 24 ins, 127 del, 324 sub ] exp/tri3/decode_dt_real_1ch/wer_15_0.5_near_room1 exp/tri3/decode_dt_simu_1ch -%WER 6.78 [ 276 / 4071, 38 ins, 42 del, 196 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room1 -%WER 18.28 [ 742 / 4058, 65 ins, 155 del, 522 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_far_room2 -%WER 19.78 [ 800 / 4045, 77 ins, 148 del, 575 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_13_0.0_far_room3 -%WER 5.53 [ 225 / 4071, 36 ins, 29 del, 160 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_near_room1 -%WER 7.81 [ 317 / 4058, 48 ins, 37 del, 232 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_12_0.0_near_room2 -%WER 10.70 [ 433 / 4045, 47 ins, 86 del, 300 sub ] [PARTIAL] exp/tri3/decode_dt_simu_1ch/wer_13_0.5_near_room3 +%WER 6.78 [ 276 / 4071, 38 ins, 42 del, 196 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room1 +%WER 18.28 [ 742 / 4058, 65 ins, 155 del, 522 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_far_room2 +%WER 19.78 [ 800 / 4045, 77 ins, 148 del, 575 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_0.0_far_room3 +%WER 5.53 [ 225 / 4071, 36 ins, 29 del, 160 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_near_room1 +%WER 7.81 [ 317 / 4058, 48 ins, 37 del, 232 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.0_near_room2 +%WER 10.70 [ 433 / 4045, 47 ins, 86 del, 300 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_0.5_near_room3 exp/tri3/decode_et_real_1ch -%WER 33.09 [ 980 / 2962, 103 ins, 157 del, 720 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 -%WER 33.18 [ 1039 / 3131, 104 ins, 194 del, 741 sub ] [PARTIAL] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 +%WER 33.09 [ 980 / 2962, 103 ins, 157 del, 720 sub ] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 +%WER 33.18 [ 1039 / 3131, 104 ins, 194 del, 741 sub ] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 exp/tri3/decode_et_simu_1ch -%WER 7.47 [ 441 / 5907, 73 ins, 48 del, 320 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 -%WER 18.31 [ 1140 / 6226, 128 ins, 191 del, 821 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_13_0.0_far_room2 -%WER 21.81 [ 1280 / 5868, 109 ins, 273 del, 898 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 -%WER 7.26 [ 429 / 5907, 77 ins, 42 del, 310 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_16_1.0_near_room1 -%WER 9.52 [ 593 / 6226, 78 ins, 86 del, 429 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_13_0.5_near_room2 -%WER 11.96 [ 702 / 5868, 60 ins, 151 del, 491 sub ] [PARTIAL] exp/tri3/decode_et_simu_1ch/wer_13_1.0_near_room3 +%WER 7.47 [ 441 / 5907, 73 ins, 48 del, 320 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 +%WER 18.31 [ 1140 / 6226, 128 ins, 191 del, 821 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.0_far_room2 +%WER 21.81 [ 1280 / 5868, 109 ins, 273 del, 898 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 +%WER 7.26 [ 429 / 5907, 77 ins, 42 del, 310 sub ] exp/tri3/decode_et_simu_1ch/wer_16_1.0_near_room1 +%WER 9.52 [ 593 / 6226, 78 ins, 86 del, 429 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.5_near_room2 +%WER 11.96 [ 702 / 5868, 60 ins, 151 del, 491 sub ] exp/tri3/decode_et_simu_1ch/wer_13_1.0_near_room3 ######################################## TDNN RESULTs: exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* -%WER 19.62 [ 287 / 1463, 29 ins, 55 del, 203 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.0_far_room1 -%WER 17.65 [ 283 / 1603, 27 ins, 67 del, 189 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_12_0.0_near_room1 -%WER 3.29 [ 134 / 4071, 17 ins, 25 del, 92 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_far_room1 -%WER 7.02 [ 285 / 4058, 27 ins, 53 del, 205 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 -%WER 6.85 [ 277 / 4045, 20 ins, 54 del, 203 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_1.0_far_room3 -%WER 2.73 [ 111 / 4071, 10 ins, 18 del, 83 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_near_room1 -%WER 3.45 [ 140 / 4058, 14 ins, 24 del, 102 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.5_near_room2 -%WER 3.93 [ 159 / 4045, 23 ins, 26 del, 110 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.0_near_room3 +%WER 19.62 [ 287 / 1463, 29 ins, 55 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.0_far_room1 +%WER 17.65 [ 283 / 1603, 27 ins, 67 del, 189 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_12_0.0_near_room1 +%WER 3.29 [ 134 / 4071, 17 ins, 25 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_far_room1 +%WER 7.02 [ 285 / 4058, 27 ins, 53 del, 205 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 +%WER 6.85 [ 277 / 4045, 20 ins, 54 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_1.0_far_room3 +%WER 2.73 [ 111 / 4071, 10 ins, 18 del, 83 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_near_room1 +%WER 3.45 [ 140 / 4058, 14 ins, 24 del, 102 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.5_near_room2 +%WER 3.93 [ 159 / 4045, 23 ins, 26 del, 110 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.0_near_room3 exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* -%WER 19.04 [ 564 / 2962, 53 ins, 94 del, 417 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_far_room1 -%WER 18.17 [ 569 / 3131, 48 ins, 123 del, 398 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_near_room1 -%WER 3.72 [ 220 / 5907, 24 ins, 41 del, 155 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room1 -%WER 7.40 [ 461 / 6226, 41 ins, 97 del, 323 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room2 -%WER 7.26 [ 426 / 5868, 33 ins, 101 del, 292 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 -%WER 3.18 [ 188 / 5907, 25 ins, 38 del, 125 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room1 -%WER 4.87 [ 303 / 6226, 31 ins, 64 del, 208 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 -%WER 4.84 [ 284 / 5868, 23 ins, 64 del, 197 sub ] [PARTIAL] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_1.0_near_room3 +%WER 19.04 [ 564 / 2962, 53 ins, 94 del, 417 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_far_room1 +%WER 18.17 [ 569 / 3131, 48 ins, 123 del, 398 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_near_room1 +%WER 3.72 [ 220 / 5907, 24 ins, 41 del, 155 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room1 +%WER 7.40 [ 461 / 6226, 41 ins, 97 del, 323 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room2 +%WER 7.26 [ 426 / 5868, 33 ins, 101 del, 292 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 +%WER 3.18 [ 188 / 5907, 25 ins, 38 del, 125 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room1 +%WER 4.87 [ 303 / 6226, 31 ins, 64 del, 208 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 +%WER 4.84 [ 284 / 5868, 23 ins, 64 del, 197 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_1.0_near_room3 diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index a6ec05e3a43..00adb14d854 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -60,8 +60,6 @@ fi mkdir -p $dir/scoring_kaldi -cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; - if echo $data | grep -q "real"; then tasks="\ near_room1 far_room1" @@ -71,6 +69,9 @@ else near_room2 far_room2 \ near_room3 far_room3" fi +for task in ${tasks}; do + grep $task $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt_${task}.txt || exit 1; +done if [ $stage -le 0 ]; then @@ -100,7 +101,7 @@ if [ $stage -le 0 ]; then $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ grep $task $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ compute-wer --text --mode=present \ - ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_${wip}_${task} || exit 1; + ark:$dir/scoring_kaldi/test_filt_${task}.txt ark,p:- ">&" $dir/wer_LMWT_${wip}_${task} || exit 1; done done fi @@ -131,7 +132,7 @@ if [ $stage -le 1 ]; then $cmd $dir/scoring_kaldi/log/stats1.log \ cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ - align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ + align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt_${task}.txt ark:- ark,t:- \| \ utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; @@ -142,7 +143,7 @@ if [ $stage -le 1 ]; then $cmd $dir/scoring_kaldi/log/wer_bootci.log \ compute-wer-bootci --mode=present \ - ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ + ark:$dir/scoring_kaldi/test_filt_${task}.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; fi diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 99661320fe3..a633db8e222 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -69,8 +69,7 @@ nj=92 decode_nj=10 if [ ${stage} -le 1 ]; then - ### Task dependent. You have to make the following data preparation part by yourself. - ### But you can utilize Kaldi recipes in most cases + # data preparation wavdir=$PWD/wav echo "stage 0: Data preparation" local/generate_data.sh --wavdir ${wavdir} ${wsjcam0} From 327aabb29eac343829b59f7f4a567471ac4c6c76 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Tue, 9 Oct 2018 16:59:02 -0400 Subject: [PATCH 14/39] change naming chime5 to reverb --- egs/reverb/s5/local/nnet3/run_ivector_common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/reverb/s5/local/nnet3/run_ivector_common.sh b/egs/reverb/s5/local/nnet3/run_ivector_common.sh index 422790ff927..29a988a507e 100755 --- a/egs/reverb/s5/local/nnet3/run_ivector_common.sh +++ b/egs/reverb/s5/local/nnet3/run_ivector_common.sh @@ -53,7 +53,7 @@ if [ $stage -le 3 ]; then echo "$0: creating high-resolution MFCC features" mfccdir=data/${train_set}_sp_hires/data if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/mfcc/chime5-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/mfcc/reverb-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi for datadir in ${train_set}_sp ${test_sets}; do @@ -123,7 +123,7 @@ if [ $stage -le 6 ]; then ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then - utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/chime5-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/reverb-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage fi From 623a212e56ab005ab5b22b612437cf0dc1825b33 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Thu, 11 Oct 2018 05:46:26 -0400 Subject: [PATCH 15/39] Added Beamformit --- egs/reverb/s5/local/get_results.sh | 25 ++++++++++++++++++++++++ egs/reverb/s5/local/prepare_real_data.sh | 14 ++++++++++++- egs/reverb/s5/local/prepare_simu_data.sh | 20 +++++++++++++++++-- egs/reverb/s5/local/run_wpe.py | 3 +++ egs/reverb/s5/local/run_wpe.sh | 2 ++ egs/reverb/s5/run.sh | 6 ++++-- 6 files changed, 65 insertions(+), 5 deletions(-) diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh index 7eb632dc40f..28aa15bb604 100755 --- a/egs/reverb/s5/local/get_results.sh +++ b/egs/reverb/s5/local/get_results.sh @@ -3,6 +3,7 @@ # "Our baselines" echo "########################################" echo "GMM RESULTs:" +dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit echo "exp/tri3/decode_dt_real_1ch" cat exp/tri3/decode_dt_real_1ch/scoring_kaldi/best_wer* echo "" @@ -14,6 +15,30 @@ cat exp/tri3/decode_et_real_1ch/scoring_kaldi/best_wer* echo "" echo "exp/tri3/decode_et_simu_1ch" cat exp/tri3/decode_et_simu_1ch/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_real_2ch_beamformit" +cat exp/tri3/decode_dt_real_2ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_2ch_beamformit" +cat exp/tri3/decode_dt_simu_2ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_2ch_beamformit" +cat exp/tri3/decode_et_real_2ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_2ch_beamformit" +cat exp/tri3/decode_et_simu_2ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_real_8ch_beamformit" +cat exp/tri3/decode_dt_real_8ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_8ch_beamformit" +cat exp/tri3/decode_dt_simu_8ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_8ch_beamformit" +cat exp/tri3/decode_et_real_8ch_beamformit/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_8ch_beamformit" +cat exp/tri3/decode_et_simu_8ch_beamformit/scoring_kaldi/best_wer* echo "########################################" echo "TDNN RESULTs:" echo "exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt*" diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh index d7019697fc4..2880d4bb195 100755 --- a/egs/reverb/s5/local/prepare_real_data.sh +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -1,6 +1,7 @@ #!/bin/bash # -# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) # Apache 2.0 # This script is adapted from data preparation scripts in the Kaldi reverb recipe # https://github.com/kaldi-asr/kaldi/tree/master/egs/reverb/s5/local @@ -124,3 +125,14 @@ for nch in 1 2 8; do done done +for nch in 2 8; do + for task in dt et; do + datadir=data/${task}_real_${nch}ch_beamformit + mkdir -p ${datadir} + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch.txt > ${datadir}/text + sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + done +done diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index 1047e6b5cca..b5c6001062c 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -1,6 +1,7 @@ #!/bin/bash # -# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe) +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) # Apache 2.0 # This script is adapted from data preparation scripts in the Kaldi reverb recipe # https://github.com/kaldi-asr/kaldi/tree/master/egs/reverb/s5/local @@ -89,7 +90,11 @@ for nch in 1 2 8; do for task in tr dt et; do datadir=data/${task}_simu_${nch}ch mkdir -p ${datadir} - sort ${dir}/${task}_simu_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp + if [ ${task} == 'tr' ]; then + sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp + else + sort ${dir}/${task}_simu_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp + fi sort ${dir}/${task}_simu_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_simu_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_${nch}ch.spk2utt > ${datadir}/spk2utt @@ -97,3 +102,14 @@ for nch in 1 2 8; do done done +for nch in 2 8; do + for task in dt et; do + datadir=data/${task}_simu_${nch}ch_beamformit + mkdir -p ${datadir} + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + done +done diff --git a/egs/reverb/s5/local/run_wpe.py b/egs/reverb/s5/local/run_wpe.py index dc8c40878f4..9c5e14c107e 100644 --- a/egs/reverb/s5/local/run_wpe.py +++ b/egs/reverb/s5/local/run_wpe.py @@ -1,3 +1,6 @@ +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) +# Apache 2.0 + import numpy as np import soundfile as sf import time diff --git a/egs/reverb/s5/local/run_wpe.sh b/egs/reverb/s5/local/run_wpe.sh index 1203469abd4..77ff6fffb31 100755 --- a/egs/reverb/s5/local/run_wpe.sh +++ b/egs/reverb/s5/local/run_wpe.sh @@ -1,4 +1,6 @@ #!/bin/bash +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) +# Apache 2.0 . ./cmd.sh . ./path.sh diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index a633db8e222..3d651e639a9 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -2,6 +2,7 @@ # Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe) # Johns Hopkins University (author: Szu-Jui Chen) +# Johns Hopkins University (author: Aswin Shanmugam Subramanian) # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,7 +59,7 @@ fi #training set and test set train_set=tr_simu_8ch -test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch" +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit" # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k" @@ -70,7 +71,7 @@ decode_nj=10 if [ ${stage} -le 1 ]; then # data preparation - wavdir=$PWD/wav + wavdir=${PWD}/wav echo "stage 0: Data preparation" local/generate_data.sh --wavdir ${wavdir} ${wsjcam0} local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0} @@ -79,6 +80,7 @@ fi if [ $stage -le 2 ]; then local/run_wpe.sh + local/run_beamform.sh ${wavdir}/WPE/ fi if [ $stage -le 3 ]; then From ccebba10afe583620ff5f3e3377387443e4eaacc Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Thu, 11 Oct 2018 07:59:10 -0400 Subject: [PATCH 16/39] Updated GMM WPE and Beamformit Results --- egs/reverb/s5/RESULTS | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index 2bfda91a6a9..b92f144b458 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -44,3 +44,78 @@ exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* %WER 3.18 [ 188 / 5907, 25 ins, 38 del, 125 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room1 %WER 4.87 [ 303 / 6226, 31 ins, 64 del, 208 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 %WER 4.84 [ 284 / 5868, 23 ins, 64 del, 197 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_1.0_near_room3 + +######################################## +GMM RESULTs with WPE and Beamformit: +exp/tri3/decode_dt_real_1ch +%WER 27.48 [ 402 / 1463, 39 ins, 63 del, 300 sub ] exp/tri3/decode_dt_real_1ch/wer_14_0.5_far_room1 +%WER 22.33 [ 358 / 1603, 37 ins, 90 del, 231 sub ] exp/tri3/decode_dt_real_1ch/wer_16_0.0_near_room1 + +exp/tri3/decode_dt_simu_1ch +%WER 4.47 [ 182 / 4071, 35 ins, 26 del, 121 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.0_far_room1 +%WER 6.78 [ 275 / 4058, 41 ins, 37 del, 197 sub ] exp/tri3/decode_dt_simu_1ch/wer_17_0.0_far_room2 +%WER 5.27 [ 213 / 4045, 27 ins, 32 del, 154 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_1.0_far_room3 +%WER 4.54 [ 185 / 4071, 34 ins, 24 del, 127 sub ] exp/tri3/decode_dt_simu_1ch/wer_15_0.0_near_room1 +%WER 5.54 [ 225 / 4058, 32 ins, 27 del, 166 sub ] exp/tri3/decode_dt_simu_1ch/wer_15_0.5_near_room2 +%WER 5.59 [ 226 / 4045, 29 ins, 37 del, 160 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_near_room3 + +exp/tri3/decode_et_real_1ch +%WER 20.97 [ 621 / 2962, 113 ins, 79 del, 429 sub ] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 +%WER 21.65 [ 678 / 3131, 77 ins, 138 del, 463 sub ] exp/tri3/decode_et_real_1ch/wer_16_1.0_near_room1 + +exp/tri3/decode_et_simu_1ch +%WER 5.54 [ 327 / 5907, 65 ins, 40 del, 222 sub ] exp/tri3/decode_et_simu_1ch/wer_16_0.5_far_room1 +%WER 7.48 [ 466 / 6226, 64 ins, 61 del, 341 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room2 +%WER 7.11 [ 417 / 5868, 64 ins, 64 del, 289 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 +%WER 5.50 [ 325 / 5907, 58 ins, 44 del, 223 sub ] exp/tri3/decode_et_simu_1ch/wer_17_1.0_near_room1 +%WER 6.23 [ 388 / 6226, 60 ins, 52 del, 276 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_near_room2 +%WER 7.09 [ 416 / 5868, 67 ins, 60 del, 289 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_near_room3 + +exp/tri3/decode_dt_real_2ch_beamformit +%WER 25.91 [ 379 / 1463, 32 ins, 72 del, 275 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_16_1.0_far_room1 +%WER 21.46 [ 344 / 1603, 36 ins, 87 del, 221 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_0.0_near_room1 + +exp/tri3/decode_dt_simu_2ch_beamformit +%WER 4.91 [ 200 / 4071, 42 ins, 26 del, 132 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_17_0.0_far_room1 +%WER 6.38 [ 259 / 4058, 35 ins, 38 del, 186 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_15_1.0_far_room2 +%WER 5.12 [ 207 / 4045, 34 ins, 29 del, 144 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_13_1.0_far_room3 +%WER 5.21 [ 212 / 4071, 56 ins, 27 del, 129 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_16_0.0_near_room1 +%WER 5.32 [ 216 / 4058, 30 ins, 33 del, 153 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_17_1.0_near_room2 +%WER 5.07 [ 205 / 4045, 35 ins, 26 del, 144 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_14_0.0_near_room3 + +exp/tri3/decode_et_real_2ch_beamformit +%WER 17.45 [ 517 / 2962, 74 ins, 91 del, 352 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_17_0.5_far_room1 +%WER 18.78 [ 588 / 3131, 88 ins, 99 del, 401 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_16_0.5_near_room1 + +exp/tri3/decode_et_simu_2ch_beamformit +%WER 5.43 [ 321 / 5907, 63 ins, 44 del, 214 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_far_room1 +%WER 6.75 [ 420 / 6226, 50 ins, 62 del, 308 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_14_1.0_far_room2 +%WER 6.87 [ 403 / 5868, 75 ins, 47 del, 281 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_0.0_far_room3 +%WER 5.59 [ 330 / 5907, 70 ins, 46 del, 214 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room1 +%WER 5.89 [ 367 / 6226, 45 ins, 62 del, 260 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room2 +%WER 6.61 [ 388 / 5868, 55 ins, 67 del, 266 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_0.5_near_room3 + +exp/tri3/decode_dt_real_8ch_beamformit +%WER 19.75 [ 289 / 1463, 41 ins, 38 del, 210 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_15_1.0_far_room1 +%WER 15.41 [ 247 / 1603, 27 ins, 47 del, 173 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_15_1.0_near_room1 + +exp/tri3/decode_dt_simu_8ch_beamformit +%WER 4.77 [ 194 / 4071, 39 ins, 27 del, 128 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_0.5_far_room1 +%WER 5.45 [ 221 / 4058, 46 ins, 24 del, 151 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_16_0.0_far_room2 +%WER 4.20 [ 170 / 4045, 25 ins, 26 del, 119 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_13_1.0_far_room3 +%WER 4.91 [ 200 / 4071, 55 ins, 20 del, 125 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_0.0_near_room1 +%WER 5.22 [ 212 / 4058, 33 ins, 26 del, 153 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_15_1.0_near_room2 +%WER 4.08 [ 165 / 4045, 28 ins, 20 del, 117 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_13_1.0_near_room3 + +exp/tri3/decode_et_real_8ch_beamformit +%WER 14.38 [ 426 / 2962, 72 ins, 71 del, 283 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_15_1.0_far_room1 +%WER 13.86 [ 434 / 3131, 68 ins, 70 del, 296 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_17_1.0_near_room1 + +exp/tri3/decode_et_simu_8ch_beamformit +%WER 5.59 [ 330 / 5907, 87 ins, 36 del, 207 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_far_room1 +%WER 6.10 [ 380 / 6226, 51 ins, 55 del, 274 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_1.0_far_room2 +%WER 6.24 [ 366 / 5868, 57 ins, 51 del, 258 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_far_room3 +%WER 5.62 [ 332 / 5907, 74 ins, 44 del, 214 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_16_1.0_near_room1 +%WER 5.73 [ 357 / 6226, 56 ins, 57 del, 244 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room2 +%WER 5.93 [ 348 / 5868, 62 ins, 52 del, 234 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_near_room3 +######################################## From a4b6fb7d2de53aa802fc96e08d52f1457a25fabd Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Thu, 11 Oct 2018 08:02:17 -0400 Subject: [PATCH 17/39] Included the beamforming script --- egs/reverb/s5/local/run_beamform.sh | 142 ++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100755 egs/reverb/s5/local/run_beamform.sh diff --git a/egs/reverb/s5/local/run_beamform.sh b/egs/reverb/s5/local/run_beamform.sh new file mode 100755 index 00000000000..0549b2e34f2 --- /dev/null +++ b/egs/reverb/s5/local/run_beamform.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe) +# Copyright 2018, Johns Hopkins University (Author: Aswin Shanmugam Subramanian) + +. ./cmd.sh +. ./path.sh + +# Config: +nj=20 +cmd=run.pl + +. utils/parse_options.sh || exit 1; + +if [ $# != 1 ]; then + echo "Wrong #arguments ($#, expected 1)" + echo "Usage: local/run_beamform.sh [options] " + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + exit 1; +fi + +odir=$1 +dir=${PWD}/data/local/data + +if [ -z $BEAMFORMIT ] ; then + export BEAMFORMIT=$KALDI_ROOT/tools/extras/BeamformIt +fi +export PATH=${PATH}:$BEAMFORMIT +! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; extras/install_beamformit.sh;'" && exit 1 + +# Set bash to 'debug' mode, it will exit on : +# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', +set -e +set -u +set -o pipefail + +for task in dt et; do + for nch in 2 8; do + wdir=exp/beamform_real_${task}_${nch}ch + mkdir -p $wdir/log + arrays=$wdir/channels + output_wavfiles=$wdir/wavfiles.list + if [ ${nch} == 2 ]; then + allwavs=`cat ${dir}/${task}_real_${nch}ch_wpe_wav.scp | cut -d " " -f2` + allwavs_beamformit=`cat data/${task}_real_${nch}ch_beamformit/wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==1' > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==0' > $wdir/channels.2nd + echo $allwavs_beamformit | tr ' ' '\n' | rev | sort | rev | awk -F 'WPE/' '{print $2}' | awk -F '.wav' '{print $1}' > $output_wavfiles + paste -d" " $output_wavfiles $wdir/channels.1st $wdir/channels.2nd > $arrays + elif [ ${nch} == 8 ]; then + allwavs=`cat ${dir}/${task}_real_${nch}ch_wpe_wav.scp | cut -d " " -f2` + allwavs_beamformit=`cat data/${task}_real_${nch}ch_beamformit/wav.scp | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==1' > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==2' > $wdir/channels.2nd + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==3' > $wdir/channels.3rd + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==4' > $wdir/channels.4th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==5' > $wdir/channels.5th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==6' > $wdir/channels.6th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==7' > $wdir/channels.7th + echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==0' > $wdir/channels.8th + echo $allwavs_beamformit | tr ' ' '\n' | rev | sort | rev | awk -F 'WPE/' '{print $2}' | awk -F '.wav' '{print $1}' > $output_wavfiles + paste -d" " $output_wavfiles $wdir/channels.1st $wdir/channels.2nd $wdir/channels.3rd $wdir/channels.4th $wdir/channels.5th $wdir/channels.6th $wdir/channels.7th $wdir/channels.8th > $arrays + fi + # split the list for parallel processing + split_wavfiles="" + for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" + done + utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1; + + echo -e "Beamforming - $task - real - $nch ch\n" + # making a shell script for each job + for n in `seq $nj`; do + cat <<-EOF > $wdir/log/beamform.$n.sh + while read line; do + $BEAMFORMIT/BeamformIt -s \$line -c $arrays \ + --config_file `pwd`/conf/reverb_beamformit.cfg \ + --result_dir $odir + done < $output_wavfiles.$n + EOF + done + + chmod a+x $wdir/log/beamform.*.sh + $cmd JOB=1:$nj $wdir/log/beamform.JOB.log \ + $wdir/log/beamform.JOB.sh + done +done + +for task in dt et; do + for nch in 2 8; do + wdir=exp/beamform_simu_${task}_${nch}ch + mkdir -p $wdir/log + arrays=$wdir/channels + output_wavfiles=$wdir/wavfiles.list + if [ ${nch} == 2 ]; then + allwavs=`cat ${dir}/${task}_simu_${nch}ch_wpe_wav.scp | grep "ch[1-2].wav" | cut -d " " -f2` + allwavs_beamformit=`cat data/${task}_simu_${nch}ch_beamformit/wav.scp | grep "bf2.wav" | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | grep 'ch1' | sort > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | grep 'ch2' | sort > $wdir/channels.2nd + echo $allwavs_beamformit | tr ' ' '\n' | awk -F 'WPE/' '{print $2}' | sort | awk -F '.wav' '{print $1}' > $output_wavfiles + paste -d" " $output_wavfiles $wdir/channels.1st $wdir/channels.2nd > $arrays + elif [ ${nch} == 8 ]; then + allwavs=`cat ${dir}/${task}_simu_${nch}ch_wpe_wav.scp | grep "ch[1-8].wav" | cut -d " " -f2` + allwavs_beamformit=`cat data/${task}_simu_${nch}ch_beamformit/wav.scp | grep "bf8.wav" | cut -d " " -f2` + echo $allwavs | tr ' ' '\n' | grep 'ch1' | sort > $wdir/channels.1st + echo $allwavs | tr ' ' '\n' | grep 'ch2' | sort > $wdir/channels.2nd + echo $allwavs | tr ' ' '\n' | grep 'ch3' | sort > $wdir/channels.3rd + echo $allwavs | tr ' ' '\n' | grep 'ch4' | sort > $wdir/channels.4th + echo $allwavs | tr ' ' '\n' | grep 'ch5' | sort > $wdir/channels.5th + echo $allwavs | tr ' ' '\n' | grep 'ch6' | sort > $wdir/channels.6th + echo $allwavs | tr ' ' '\n' | grep 'ch7' | sort > $wdir/channels.7th + echo $allwavs | tr ' ' '\n' | grep 'ch8' | sort > $wdir/channels.8th + echo $allwavs_beamformit | tr ' ' '\n' | awk -F 'WPE/' '{print $2}' | sort | awk -F '.wav' '{print $1}' > $output_wavfiles + paste -d" " $output_wavfiles $wdir/channels.1st $wdir/channels.2nd $wdir/channels.3rd $wdir/channels.4th $wdir/channels.5th $wdir/channels.6th $wdir/channels.7th $wdir/channels.8th > $arrays + fi + # split the list for parallel processing + split_wavfiles="" + for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" + done + utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1; + + echo -e "Beamforming - $task - simu - $nch ch\n" + # making a shell script for each job + for n in `seq $nj`; do + cat <<-EOF > $wdir/log/beamform.$n.sh + while read line; do + $BEAMFORMIT/BeamformIt -s \$line -c $arrays \ + --config_file `pwd`/conf/reverb_beamformit.cfg \ + --result_dir $odir + done < $output_wavfiles.$n + EOF + done + + chmod a+x $wdir/log/beamform.*.sh + $cmd JOB=1:$nj $wdir/log/beamform.JOB.log \ + $wdir/log/beamform.JOB.sh + done +done +echo "`basename $0` Done." From c9e29fee76090d7a10cd1c0c923ab895b1703c1b Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Thu, 11 Oct 2018 08:29:30 -0400 Subject: [PATCH 18/39] Added Beamformit config file --- egs/reverb/s5/conf/reverb_beamformit.cfg | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 egs/reverb/s5/conf/reverb_beamformit.cfg diff --git a/egs/reverb/s5/conf/reverb_beamformit.cfg b/egs/reverb/s5/conf/reverb_beamformit.cfg new file mode 100755 index 00000000000..70fdd858651 --- /dev/null +++ b/egs/reverb/s5/conf/reverb_beamformit.cfg @@ -0,0 +1,50 @@ +#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/) + +# scrolling size to compute the delays +scroll_size = 250 + +# cross correlation computation window size +window_size = 500 + +#amount of maximum points for the xcorrelation taken into account +nbest_amount = 4 + +#flag wether to apply an automatic noise thresholding +do_noise_threshold = 1 + +#Percentage of frames with lower xcorr taken as noisy +noise_percent = 10 + +######## acoustic modelling parameters + +#transition probabilities weight for multichannel decoding +trans_weight_multi = 25 +trans_weight_nbest = 25 + +### + +#flag wether to print the feaures after setting them, or not +print_features = 1 + +#flag wether to use the bad frames in the sum process +do_avoid_bad_frames = 1 + +#flag to use the best channel (SNR) as a reference +#defined from command line +do_compute_reference = 1 + +#flag wether to use a uem file or not(process all the file) +do_use_uem_file = 0 + +#flag wether to use an adaptative weights scheme or fixed weights +do_adapt_weights = 1 + +#flag wether to output the sph files or just run the system to create the auxiliary files +do_write_sph_files = 1 + +####directories where to store/retrieve info#### +#channels_file = ./cfg-files/channels + +#show needs to be passed as argument normally, here a default one is given just in case +#show_id = Ttmp + From 1c771b70418a52f4dc7e5c44c28a4170743ec356 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Mon, 15 Oct 2018 13:02:33 -0400 Subject: [PATCH 19/39] Store 1ch and 2ch WPE wavefiles in separate directories --- egs/reverb/s5/local/prepare_real_data.sh | 6 +++--- egs/reverb/s5/local/prepare_simu_data.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh index 2880d4bb195..7c1036477d2 100755 --- a/egs/reverb/s5/local/prepare_real_data.sh +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -81,10 +81,10 @@ for nch in 1 2 8; do for task in dt et; do if [ ${task} == 'dt' ]; then audiodir=${reverb}/MC_WSJ_AV_Dev - audiodir_wpe=${wavdir}/WPE/MC_WSJ_AV_Dev + audiodir_wpe=${wavdir}/WPE/${nch}ch/MC_WSJ_AV_Dev elif [ ${task} == 'et' ]; then audiodir=${reverb}/MC_WSJ_AV_Eval - audiodir_wpe=${wavdir}/WPE/MC_WSJ_AV_Eval + audiodir_wpe=${wavdir}/WPE/${nch}ch/MC_WSJ_AV_Eval fi for x in `ls ${taskdir} | grep RealData | grep _${task}_`; do perl -se 'while(<>){m:^\S+/[\w\-]*_(T\w{6,7})\.wav$: || die "Bad line $_"; $id = lc $1; print "$id $dir$_";}' -- -dir=${audiodir} ${taskdir}/$x |\ @@ -129,7 +129,7 @@ for nch in 2 8; do for task in dt et; do datadir=data/${task}_real_${nch}ch_beamformit mkdir -p ${datadir} - sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" | sed -e "s/1ch/${nch}ch/" > ${datadir}/wav.scp sort ${dir}/${task}_real_1ch.txt > ${datadir}/text sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index b5c6001062c..c2a68bdb26e 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -58,12 +58,12 @@ for nch in 1 2 8; do task=tr for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do - perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/${nch}ch/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp for task in dt et; do for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do - perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/${nch}ch/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp done @@ -106,7 +106,7 @@ for nch in 2 8; do for task in dt et; do datadir=data/${task}_simu_${nch}ch_beamformit mkdir -p ${datadir} - sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" | sed -e "s/1ch/${nch}ch/" > ${datadir}/wav.scp sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt From c2108ce47162108deddb00208a58c01cfa9bb8bd Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Mon, 15 Oct 2018 16:56:42 -0400 Subject: [PATCH 20/39] rm check_tools.sh and bug fixed in run.sh --- egs/reverb/s5/local/check_tools.sh | 53 ------------------------------ egs/reverb/s5/run.sh | 4 +-- 2 files changed, 1 insertion(+), 56 deletions(-) delete mode 100755 egs/reverb/s5/local/check_tools.sh diff --git a/egs/reverb/s5/local/check_tools.sh b/egs/reverb/s5/local/check_tools.sh deleted file mode 100755 index 698d8e411dd..00000000000 --- a/egs/reverb/s5/local/check_tools.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -u - -# Copyright 2015 (c) Johns Hopkins University (Jan Trmal ) - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -[ -f ./path.sh ] && . ./path.sh - -command -v uconv &>/dev/null \ - || { echo >&2 "uconv not found on PATH. You will have to install ICU4C"; exit 1; } - -command -v ngram &>/dev/null \ - || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh to install it"; exit 1; } - -if [ -z ${LIBLBFGS} ]; then - echo >&2 "SRILM is not compiled with the support of MaxEnt models." - echo >&2 "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh" - echo >&2 "which will take care of compiling the SRILM with MaxEnt support" - exit 1; -fi - -sox=`command -v sox 2>/dev/null` \ - || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; } - -# If sox is found on path, check if the version is correct -if [ ! -z "$sox" ]; then - sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'` - if [[ ! $sox_version =~ v14.4.* ]]; then - echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher." - exit 1 - fi -fi - -command -v phonetisaurus-align &>/dev/null \ - || { echo >&2 "Phonetisaurus not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_phonetisaurus.sh to install it"; exit 1; } - -command -v ffmpeg &>/dev/null \ - || { echo >&2 "FFMPEG not found on PATH. You will have to install FFMPEG"; exit 1; } - -exit 0 - - diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 3d651e639a9..39c56207da7 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -32,8 +32,6 @@ if [ ! `which matlab` ]; then exit 1 fi -./local/check_tools.sh || exit 1 - . ./cmd.sh . ./path.sh @@ -69,9 +67,9 @@ nj=92 # number of jobs for decoding decode_nj=10 +wavdir=${PWD}/wav if [ ${stage} -le 1 ]; then # data preparation - wavdir=${PWD}/wav echo "stage 0: Data preparation" local/generate_data.sh --wavdir ${wavdir} ${wsjcam0} local/prepare_simu_data.sh --wavdir ${wavdir} ${reverb} ${wsjcam0} From d1e5998abe819d37ddff70bf7dc2db3abf0adc4e Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Tue, 16 Oct 2018 07:40:27 -0400 Subject: [PATCH 21/39] remove clean room in recog lists, add wpe only in recog set --- egs/reverb/s5/local/get_results.sh | 25 ++++++++++++++++++++- egs/reverb/s5/local/prepare_real_data.sh | 28 ++++++++++++++---------- egs/reverb/s5/local/prepare_simu_data.sh | 15 +++++++++---- egs/reverb/s5/local/run_beamform.sh | 2 +- egs/reverb/s5/local/run_wpe.sh | 4 ++-- 5 files changed, 54 insertions(+), 20 deletions(-) diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh index 28aa15bb604..e1fca60a2dd 100755 --- a/egs/reverb/s5/local/get_results.sh +++ b/egs/reverb/s5/local/get_results.sh @@ -3,7 +3,6 @@ # "Our baselines" echo "########################################" echo "GMM RESULTs:" -dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit echo "exp/tri3/decode_dt_real_1ch" cat exp/tri3/decode_dt_real_1ch/scoring_kaldi/best_wer* echo "" @@ -16,6 +15,30 @@ echo "" echo "exp/tri3/decode_et_simu_1ch" cat exp/tri3/decode_et_simu_1ch/scoring_kaldi/best_wer* echo "" +echo "exp/tri3/decode_dt_real_2ch_wpe" +cat exp/tri3/decode_dt_real_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_2ch_wpe" +cat exp/tri3/decode_dt_simu_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_2ch_wpe" +cat exp/tri3/decode_et_real_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_2ch_wpe" +cat exp/tri3/decode_et_simu_2ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_real_8ch_wpe" +cat exp/tri3/decode_dt_real_8ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_8ch_wpe" +cat exp/tri3/decode_dt_simu_8ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_8ch_wpe" +cat exp/tri3/decode_et_real_8ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_8ch_wpe" +cat exp/tri3/decode_et_simu_8ch_wpe/scoring_kaldi/best_wer* +echo "" echo "exp/tri3/decode_dt_real_2ch_beamformit" cat exp/tri3/decode_dt_real_2ch_beamformit/scoring_kaldi/best_wer* echo "" diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh index 7c1036477d2..4584c0acd60 100755 --- a/egs/reverb/s5/local/prepare_real_data.sh +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -122,17 +122,21 @@ for nch in 1 2 8; do sort ${dir}/${task}_real_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_${nch}ch.spk2utt > ${datadir}/spk2utt ./utils/fix_data_dir.sh ${datadir} - done -done - -for nch in 2 8; do - for task in dt et; do - datadir=data/${task}_real_${nch}ch_beamformit - mkdir -p ${datadir} - sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" | sed -e "s/1ch/${nch}ch/" > ${datadir}/wav.scp - sort ${dir}/${task}_real_1ch.txt > ${datadir}/text - sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk - sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt - ./utils/fix_data_dir.sh ${datadir} + if [ ${nch} != 1 ]; then + datadir=data/${task}_real_${nch}ch_beamformit + mkdir -p ${datadir} + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/-[1-8]_/-bf${nch}_/" | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch.txt > ${datadir}/text + sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + datadir=data/${task}_real_${nch}ch_wpe + mkdir -p ${datadir} + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch.txt > ${datadir}/text + sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + fi done done diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index c2a68bdb26e..580bb4d8702 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -50,7 +50,7 @@ for nch in 1 2 8; do sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wav.scp for task in dt et; do - for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${reverb}/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wav.scp @@ -62,7 +62,7 @@ for nch in 1 2 8; do sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp for task in dt et; do - for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${wavdir}/WPE/${nch}ch/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wpe_wav.scp @@ -70,7 +70,7 @@ for nch in 1 2 8; do # make a transcript for task in tr dt et; do - for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ sed -e "s/^\(...\)/\1_${x}_\1/" @@ -106,7 +106,14 @@ for nch in 2 8; do for task in dt et; do datadir=data/${task}_simu_${nch}ch_beamformit mkdir -p ${datadir} - sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" | sed -e "s/1ch/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + datadir=data/${task}_simu_${nch}ch_wpe + mkdir -p ${datadir} + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt diff --git a/egs/reverb/s5/local/run_beamform.sh b/egs/reverb/s5/local/run_beamform.sh index 0549b2e34f2..1c8aade7287 100755 --- a/egs/reverb/s5/local/run_beamform.sh +++ b/egs/reverb/s5/local/run_beamform.sh @@ -7,7 +7,7 @@ . ./path.sh # Config: -nj=20 +nj=50 cmd=run.pl . utils/parse_options.sh || exit 1; diff --git a/egs/reverb/s5/local/run_wpe.sh b/egs/reverb/s5/local/run_wpe.sh index 77ff6fffb31..d1ea56c6c55 100755 --- a/egs/reverb/s5/local/run_wpe.sh +++ b/egs/reverb/s5/local/run_wpe.sh @@ -6,7 +6,7 @@ . ./path.sh # Config: -nj=20 +nj=50 cmd=run.pl . utils/parse_options.sh || exit 1; @@ -104,7 +104,7 @@ for task in dt et; do done done -for task in tr dt et; do +for task in dt et; do for nch in 1 2 8; do wdir=exp/wpe_simu_${task}_${nch}ch mkdir -p $wdir/log From 3dc5bb812da77a3de31349fff542ecd1b0139912 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Tue, 16 Oct 2018 07:46:51 -0400 Subject: [PATCH 22/39] added wpe recog sets in run.sh --- egs/reverb/s5/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 39c56207da7..7816a75c909 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -57,7 +57,7 @@ fi #training set and test set train_set=tr_simu_8ch -test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit" +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe" # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k" From ecda4c1fdda92d4ab693229a1e2b025750ff0df6 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Tue, 16 Oct 2018 10:44:54 -0400 Subject: [PATCH 23/39] Added 1ch without WPE also to recog sets --- egs/reverb/s5/local/prepare_real_data.sh | 16 ++++----- egs/reverb/s5/local/prepare_simu_data.sh | 43 +++++++++++------------- egs/reverb/s5/run.sh | 2 +- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh index 4584c0acd60..2da51b9786b 100755 --- a/egs/reverb/s5/local/prepare_real_data.sh +++ b/egs/reverb/s5/local/prepare_real_data.sh @@ -117,7 +117,7 @@ for nch in 1 2 8; do for task in dt et; do datadir=data/${task}_real_${nch}ch mkdir -p ${datadir} - sort ${dir}/${task}_real_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_real_${nch}ch_wav.scp > ${datadir}/wav.scp sort ${dir}/${task}_real_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_real_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_${nch}ch.spk2utt > ${datadir}/spk2utt @@ -130,13 +130,13 @@ for nch in 1 2 8; do sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt ./utils/fix_data_dir.sh ${datadir} - datadir=data/${task}_real_${nch}ch_wpe - mkdir -p ${datadir} - sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp - sort ${dir}/${task}_real_1ch.txt > ${datadir}/text - sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk - sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt - ./utils/fix_data_dir.sh ${datadir} fi + datadir=data/${task}_real_${nch}ch_wpe + mkdir -p ${datadir} + sort ${dir}/${task}_real_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_real_1ch.txt > ${datadir}/text + sort ${dir}/${task}_real_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_real_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} done done diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index 580bb4d8702..b2019c37e01 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -90,33 +90,28 @@ for nch in 1 2 8; do for task in tr dt et; do datadir=data/${task}_simu_${nch}ch mkdir -p ${datadir} - if [ ${task} == 'tr' ]; then - sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp - else - sort ${dir}/${task}_simu_${nch}ch_wpe_wav.scp > ${datadir}/wav.scp - fi + sort ${dir}/${task}_simu_${nch}ch_wav.scp > ${datadir}/wav.scp sort ${dir}/${task}_simu_${nch}ch.txt > ${datadir}/text sort ${dir}/${task}_simu_${nch}ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_${nch}ch.spk2utt > ${datadir}/spk2utt ./utils/fix_data_dir.sh ${datadir} - done -done - -for nch in 2 8; do - for task in dt et; do - datadir=data/${task}_simu_${nch}ch_beamformit - mkdir -p ${datadir} - sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp - sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text - sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk - sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt - ./utils/fix_data_dir.sh ${datadir} - datadir=data/${task}_simu_${nch}ch_wpe - mkdir -p ${datadir} - sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp - sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text - sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk - sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt - ./utils/fix_data_dir.sh ${datadir} + if [ ${task} != 'tr' ]; then + datadir=data/${task}_simu_${nch}ch_wpe + mkdir -p ${datadir} + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + if [ ${nch} != 1 ]; then + datadir=data/${task}_simu_${nch}ch_beamformit + mkdir -p ${datadir} + sort ${dir}/${task}_simu_1ch_wpe_wav.scp | sed -e "s/ch1/bf${nch}/" | sed -e "s/WPE\/1ch/WPE\/${nch}ch/" > ${datadir}/wav.scp + sort ${dir}/${task}_simu_1ch.txt > ${datadir}/text + sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} + fi + fi done done diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 7816a75c909..3cc3efca9e7 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -57,7 +57,7 @@ fi #training set and test set train_set=tr_simu_8ch -test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe" +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe" # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k" From d1947dfbed782f735fa0398738a54e8bd8bcfde2 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Tue, 16 Oct 2018 15:41:54 -0400 Subject: [PATCH 24/39] bug fix for code refactoring in previous commit --- egs/reverb/s5/local/prepare_simu_data.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index b2019c37e01..cfdc226dd1e 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -69,7 +69,14 @@ for nch in 1 2 8; do done # make a transcript - for task in tr dt et; do + task=tr + for x in `ls ${taskdir} | grep SimData | grep _${task}_`; do + perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ + perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_simu_${nch}ch.trans1 || exit 1; + cat ${dir}/${task}_simu_${nch}ch.trans1 | local/normalize_transcript.pl ${noiseword} > ${dir}/${task}_simu_${nch}ch.txt || exit 1; + for task in dt et; do for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e far -e near`; do perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ From 21f233750cb1950c6277df60aa402c82b9154f9e Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Wed, 17 Oct 2018 10:39:55 -0400 Subject: [PATCH 25/39] change the data storage place on the grid --- egs/reverb/s5/local/nnet3/run_ivector_common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/local/nnet3/run_ivector_common.sh b/egs/reverb/s5/local/nnet3/run_ivector_common.sh index 29a988a507e..3af3ad77565 100755 --- a/egs/reverb/s5/local/nnet3/run_ivector_common.sh +++ b/egs/reverb/s5/local/nnet3/run_ivector_common.sh @@ -53,7 +53,7 @@ if [ $stage -le 3 ]; then echo "$0: creating high-resolution MFCC features" mfccdir=data/${train_set}_sp_hires/data if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/mfcc/reverb-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl /export/b1{4,5,6,8}/$USER/kaldi-data/mfcc/reverb-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi for datadir in ${train_set}_sp ${test_sets}; do From 31f82f1491fbb5e2a2f7507b7d04a9bb6082344f Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Wed, 7 Nov 2018 16:15:19 -0500 Subject: [PATCH 26/39] Added dereverberation measures, cln evaluation and updated RESULTS --- egs/reverb/s5/RESULTS | 319 +++++++++++++------ egs/reverb/s5/local/compute_se_scores.sh | 39 +++ egs/reverb/s5/local/download_se_eval_tool.sh | 33 ++ egs/reverb/s5/local/get_results.sh | 19 +- egs/reverb/s5/local/prepare_simu_data.sh | 26 ++ egs/reverb/s5/local/score.sh | 3 + egs/reverb/s5/run.sh | 40 ++- 7 files changed, 370 insertions(+), 109 deletions(-) create mode 100755 egs/reverb/s5/local/compute_se_scores.sh create mode 100755 egs/reverb/s5/local/download_se_eval_tool.sh diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index b92f144b458..bc994e05d48 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -1,121 +1,250 @@ ######################################## GMM RESULTs: exp/tri3/decode_dt_real_1ch -%WER 34.18 [ 500 / 1463, 24 ins, 125 del, 351 sub ] exp/tri3/decode_dt_real_1ch/wer_17_1.0_far_room1 -%WER 29.63 [ 475 / 1603, 24 ins, 127 del, 324 sub ] exp/tri3/decode_dt_real_1ch/wer_15_0.5_near_room1 +%WER 34.59 [ 506 / 1463, 40 ins, 113 del, 353 sub ] exp/tri3/decode_dt_real_1ch/wer_15_0.5_far_room1 +%WER 30.26 [ 485 / 1603, 42 ins, 112 del, 331 sub ] exp/tri3/decode_dt_real_1ch/wer_17_0.0_near_room1 exp/tri3/decode_dt_simu_1ch -%WER 6.78 [ 276 / 4071, 38 ins, 42 del, 196 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room1 -%WER 18.28 [ 742 / 4058, 65 ins, 155 del, 522 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_far_room2 -%WER 19.78 [ 800 / 4045, 77 ins, 148 del, 575 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_0.0_far_room3 -%WER 5.53 [ 225 / 4071, 36 ins, 29 del, 160 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_near_room1 -%WER 7.81 [ 317 / 4058, 48 ins, 37 del, 232 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.0_near_room2 -%WER 10.70 [ 433 / 4045, 47 ins, 86 del, 300 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_0.5_near_room3 +%WER 6.73 [ 274 / 4071, 42 ins, 40 del, 192 sub ] exp/tri3/decode_dt_simu_1ch/wer_15_0.0_far_room1 +%WER 18.38 [ 746 / 4058, 83 ins, 133 del, 530 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.5_far_room2 +%WER 19.70 [ 797 / 4045, 73 ins, 183 del, 541 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room3 +%WER 5.43 [ 221 / 4071, 34 ins, 38 del, 149 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_1.0_near_room1 +%WER 7.74 [ 314 / 4058, 54 ins, 47 del, 213 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.5_near_room2 +%WER 7.86 [ 318 / 4045, 38 ins, 53 del, 227 sub ] exp/tri3/decode_dt_simu_1ch/wer_16_0.0_near_room3 exp/tri3/decode_et_real_1ch -%WER 33.09 [ 980 / 2962, 103 ins, 157 del, 720 sub ] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 -%WER 33.18 [ 1039 / 3131, 104 ins, 194 del, 741 sub ] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 +%WER 32.82 [ 972 / 2962, 93 ins, 178 del, 701 sub ] exp/tri3/decode_et_real_1ch/wer_17_0.0_far_room1 +%WER 33.15 [ 1038 / 3131, 111 ins, 177 del, 750 sub ] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 exp/tri3/decode_et_simu_1ch -%WER 7.47 [ 441 / 5907, 73 ins, 48 del, 320 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 -%WER 18.31 [ 1140 / 6226, 128 ins, 191 del, 821 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.0_far_room2 -%WER 21.81 [ 1280 / 5868, 109 ins, 273 del, 898 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 -%WER 7.26 [ 429 / 5907, 77 ins, 42 del, 310 sub ] exp/tri3/decode_et_simu_1ch/wer_16_1.0_near_room1 -%WER 9.52 [ 593 / 6226, 78 ins, 86 del, 429 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.5_near_room2 -%WER 11.96 [ 702 / 5868, 60 ins, 151 del, 491 sub ] exp/tri3/decode_et_simu_1ch/wer_13_1.0_near_room3 -######################################## -TDNN RESULTs: -exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* -%WER 19.62 [ 287 / 1463, 29 ins, 55 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.0_far_room1 -%WER 17.65 [ 283 / 1603, 27 ins, 67 del, 189 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_12_0.0_near_room1 -%WER 3.29 [ 134 / 4071, 17 ins, 25 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_far_room1 -%WER 7.02 [ 285 / 4058, 27 ins, 53 del, 205 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 -%WER 6.85 [ 277 / 4045, 20 ins, 54 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_1.0_far_room3 -%WER 2.73 [ 111 / 4071, 10 ins, 18 del, 83 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_near_room1 -%WER 3.45 [ 140 / 4058, 14 ins, 24 del, 102 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.5_near_room2 -%WER 3.93 [ 159 / 4045, 23 ins, 26 del, 110 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.0_near_room3 - -exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* -%WER 19.04 [ 564 / 2962, 53 ins, 94 del, 417 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_far_room1 -%WER 18.17 [ 569 / 3131, 48 ins, 123 del, 398 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_0.5_near_room1 -%WER 3.72 [ 220 / 5907, 24 ins, 41 del, 155 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room1 -%WER 7.40 [ 461 / 6226, 41 ins, 97 del, 323 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room2 -%WER 7.26 [ 426 / 5868, 33 ins, 101 del, 292 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 -%WER 3.18 [ 188 / 5907, 25 ins, 38 del, 125 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room1 -%WER 4.87 [ 303 / 6226, 31 ins, 64 del, 208 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 -%WER 4.84 [ 284 / 5868, 23 ins, 64 del, 197 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_1.0_near_room3 - -######################################## -GMM RESULTs with WPE and Beamformit: -exp/tri3/decode_dt_real_1ch -%WER 27.48 [ 402 / 1463, 39 ins, 63 del, 300 sub ] exp/tri3/decode_dt_real_1ch/wer_14_0.5_far_room1 -%WER 22.33 [ 358 / 1603, 37 ins, 90 del, 231 sub ] exp/tri3/decode_dt_real_1ch/wer_16_0.0_near_room1 - -exp/tri3/decode_dt_simu_1ch -%WER 4.47 [ 182 / 4071, 35 ins, 26 del, 121 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.0_far_room1 -%WER 6.78 [ 275 / 4058, 41 ins, 37 del, 197 sub ] exp/tri3/decode_dt_simu_1ch/wer_17_0.0_far_room2 -%WER 5.27 [ 213 / 4045, 27 ins, 32 del, 154 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_1.0_far_room3 -%WER 4.54 [ 185 / 4071, 34 ins, 24 del, 127 sub ] exp/tri3/decode_dt_simu_1ch/wer_15_0.0_near_room1 -%WER 5.54 [ 225 / 4058, 32 ins, 27 del, 166 sub ] exp/tri3/decode_dt_simu_1ch/wer_15_0.5_near_room2 -%WER 5.59 [ 226 / 4045, 29 ins, 37 del, 160 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_near_room3 - -exp/tri3/decode_et_real_1ch -%WER 20.97 [ 621 / 2962, 113 ins, 79 del, 429 sub ] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 -%WER 21.65 [ 678 / 3131, 77 ins, 138 del, 463 sub ] exp/tri3/decode_et_real_1ch/wer_16_1.0_near_room1 - -exp/tri3/decode_et_simu_1ch -%WER 5.54 [ 327 / 5907, 65 ins, 40 del, 222 sub ] exp/tri3/decode_et_simu_1ch/wer_16_0.5_far_room1 -%WER 7.48 [ 466 / 6226, 64 ins, 61 del, 341 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room2 -%WER 7.11 [ 417 / 5868, 64 ins, 64 del, 289 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 -%WER 5.50 [ 325 / 5907, 58 ins, 44 del, 223 sub ] exp/tri3/decode_et_simu_1ch/wer_17_1.0_near_room1 -%WER 6.23 [ 388 / 6226, 60 ins, 52 del, 276 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_near_room2 -%WER 7.09 [ 416 / 5868, 67 ins, 60 del, 289 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_near_room3 +%WER 7.55 [ 446 / 5907, 79 ins, 55 del, 312 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 +%WER 18.36 [ 1143 / 6226, 109 ins, 209 del, 825 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.5_far_room2 +%WER 20.60 [ 1209 / 5868, 140 ins, 244 del, 825 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.0_far_room3 +%WER 6.97 [ 412 / 5907, 70 ins, 44 del, 298 sub ] exp/tri3/decode_et_simu_1ch/wer_16_1.0_near_room1 +%WER 9.46 [ 589 / 6226, 54 ins, 116 del, 419 sub ] exp/tri3/decode_et_simu_1ch/wer_15_1.0_near_room2 +%WER 10.62 [ 623 / 5868, 81 ins, 105 del, 437 sub ] exp/tri3/decode_et_simu_1ch/wer_12_1.0_near_room3 + +exp/tri3/decode_dt_real_1ch_wpe +%WER 33.83 [ 495 / 1463, 42 ins, 97 del, 356 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_17_0.0_far_room1 +%WER 27.76 [ 445 / 1603, 34 ins, 110 del, 301 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_16_0.0_near_room1 + +exp/tri3/decode_dt_simu_1ch_wpe +%WER 6.61 [ 269 / 4071, 32 ins, 50 del, 187 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_1.0_far_room1 +%WER 17.32 [ 703 / 4058, 74 ins, 123 del, 506 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_16_0.0_far_room2 +%WER 18.34 [ 742 / 4045, 87 ins, 143 del, 512 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_0.0_far_room3 +%WER 5.50 [ 224 / 4071, 35 ins, 43 del, 146 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_16_1.0_near_room1 +%WER 7.20 [ 292 / 4058, 41 ins, 46 del, 205 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_12_1.0_near_room2 +%WER 7.69 [ 311 / 4045, 30 ins, 59 del, 222 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_1.0_near_room3 + +exp/tri3/decode_et_real_1ch_wpe +%WER 30.25 [ 896 / 2962, 84 ins, 178 del, 634 sub ] exp/tri3/decode_et_real_1ch_wpe/wer_16_0.5_far_room1 +%WER 31.46 [ 985 / 3131, 111 ins, 157 del, 717 sub ] exp/tri3/decode_et_real_1ch_wpe/wer_16_0.0_near_room1 + +exp/tri3/decode_et_simu_1ch_wpe +%WER 7.23 [ 427 / 5907, 71 ins, 51 del, 305 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_16_0.5_far_room1 +%WER 16.86 [ 1050 / 6226, 75 ins, 243 del, 732 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_14_1.0_far_room2 +%WER 19.90 [ 1168 / 5868, 134 ins, 250 del, 784 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_13_0.5_far_room3 +%WER 7.13 [ 421 / 5907, 76 ins, 39 del, 306 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_17_0.5_near_room1 +%WER 8.54 [ 532 / 6226, 55 ins, 96 del, 381 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_17_0.5_near_room2 +%WER 10.07 [ 591 / 5868, 94 ins, 80 del, 417 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_12_0.5_near_room3 + +exp/tri3/decode_dt_real_2ch_wpe +%WER 31.51 [ 461 / 1463, 39 ins, 87 del, 335 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.0_far_room1 +%WER 26.51 [ 425 / 1603, 31 ins, 105 del, 289 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.0_near_room1 + +exp/tri3/decode_dt_simu_2ch_wpe +%WER 6.24 [ 254 / 4071, 51 ins, 34 del, 169 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.0_far_room1 +%WER 14.32 [ 581 / 4058, 78 ins, 98 del, 405 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.0_far_room2 +%WER 17.33 [ 701 / 4045, 87 ins, 154 del, 460 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_14_1.0_far_room3 +%WER 5.75 [ 234 / 4071, 35 ins, 40 del, 159 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_17_1.0_near_room1 +%WER 7.07 [ 287 / 4058, 40 ins, 41 del, 206 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_1.0_near_room2 +%WER 7.54 [ 305 / 4045, 41 ins, 45 del, 219 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.5_near_room3 + +exp/tri3/decode_et_real_2ch_wpe +%WER 26.81 [ 794 / 2962, 113 ins, 116 del, 565 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_16_0.0_far_room1 +%WER 28.11 [ 880 / 3131, 80 ins, 189 del, 611 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_17_1.0_near_room1 + +exp/tri3/decode_et_simu_2ch_wpe +%WER 7.03 [ 415 / 5907, 73 ins, 45 del, 297 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_16_0.5_far_room1 +%WER 14.63 [ 911 / 6226, 93 ins, 161 del, 657 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_15_0.5_far_room2 +%WER 18.58 [ 1090 / 5868, 128 ins, 213 del, 749 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_14_0.5_far_room3 +%WER 7.06 [ 417 / 5907, 68 ins, 47 del, 302 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_17_1.0_near_room1 +%WER 8.75 [ 545 / 6226, 77 ins, 64 del, 404 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_13_0.5_near_room2 +%WER 9.08 [ 533 / 5868, 86 ins, 82 del, 365 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_12_1.0_near_room3 + +exp/tri3/decode_dt_real_8ch_wpe +%WER 27.41 [ 401 / 1463, 44 ins, 71 del, 286 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_1.0_far_room1 +%WER 23.96 [ 384 / 1603, 42 ins, 83 del, 259 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_1.0_near_room1 + +exp/tri3/decode_dt_simu_8ch_wpe +%WER 6.31 [ 257 / 4071, 43 ins, 46 del, 168 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_far_room1 +%WER 8.92 [ 362 / 4058, 63 ins, 56 del, 243 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_12_1.0_far_room2 +%WER 9.67 [ 391 / 4045, 131 ins, 42 del, 218 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_14_1.0_far_room3 +%WER 6.07 [ 247 / 4071, 39 ins, 39 del, 169 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_near_room1 +%WER 7.22 [ 293 / 4058, 60 ins, 32 del, 201 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_13_0.5_near_room2 +%WER 6.23 [ 252 / 4045, 54 ins, 25 del, 173 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_14_0.0_near_room3 + +exp/tri3/decode_et_real_8ch_wpe +%WER 22.01 [ 652 / 2962, 118 ins, 87 del, 447 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_17_0.5_far_room1 +%WER 23.95 [ 750 / 3131, 134 ins, 125 del, 491 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_16_1.0_near_room1 + +exp/tri3/decode_et_simu_8ch_wpe +%WER 7.16 [ 423 / 5907, 89 ins, 44 del, 290 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_0.5_far_room1 +%WER 9.73 [ 606 / 6226, 87 ins, 90 del, 429 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_14_1.0_far_room2 +%WER 9.22 [ 541 / 5868, 105 ins, 77 del, 359 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_1.0_far_room3 +%WER 7.08 [ 418 / 5907, 70 ins, 45 del, 303 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_17_1.0_near_room1 +%WER 7.98 [ 497 / 6226, 61 ins, 80 del, 356 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_1.0_near_room2 +%WER 7.91 [ 464 / 5868, 83 ins, 67 del, 314 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_16_1.0_near_room3 exp/tri3/decode_dt_real_2ch_beamformit -%WER 25.91 [ 379 / 1463, 32 ins, 72 del, 275 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_16_1.0_far_room1 -%WER 21.46 [ 344 / 1603, 36 ins, 87 del, 221 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_0.0_near_room1 +%WER 28.43 [ 416 / 1463, 32 ins, 82 del, 302 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_1.0_far_room1 +%WER 23.46 [ 376 / 1603, 26 ins, 98 del, 252 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_1.0_near_room1 exp/tri3/decode_dt_simu_2ch_beamformit -%WER 4.91 [ 200 / 4071, 42 ins, 26 del, 132 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_17_0.0_far_room1 -%WER 6.38 [ 259 / 4058, 35 ins, 38 del, 186 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_15_1.0_far_room2 -%WER 5.12 [ 207 / 4045, 34 ins, 29 del, 144 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_13_1.0_far_room3 -%WER 5.21 [ 212 / 4071, 56 ins, 27 del, 129 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_16_0.0_near_room1 -%WER 5.32 [ 216 / 4058, 30 ins, 33 del, 153 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_17_1.0_near_room2 -%WER 5.07 [ 205 / 4045, 35 ins, 26 del, 144 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_14_0.0_near_room3 +%WER 6.46 [ 263 / 4071, 57 ins, 36 del, 170 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_14_0.5_far_room1 +%WER 11.80 [ 479 / 4058, 59 ins, 87 del, 333 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_10_1.0_far_room2 +%WER 13.75 [ 556 / 4045, 97 ins, 90 del, 369 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_11_0.5_far_room3 +%WER 6.29 [ 256 / 4071, 49 ins, 41 del, 166 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_17_1.0_near_room1 +%WER 6.36 [ 258 / 4058, 42 ins, 33 del, 183 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_12_1.0_near_room2 +%WER 6.67 [ 270 / 4045, 42 ins, 45 del, 183 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_12_1.0_near_room3 exp/tri3/decode_et_real_2ch_beamformit -%WER 17.45 [ 517 / 2962, 74 ins, 91 del, 352 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_17_0.5_far_room1 -%WER 18.78 [ 588 / 3131, 88 ins, 99 del, 401 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_16_0.5_near_room1 +%WER 25.05 [ 742 / 2962, 75 ins, 159 del, 508 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_16_1.0_far_room1 +%WER 23.83 [ 746 / 3131, 87 ins, 146 del, 513 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_17_1.0_near_room1 exp/tri3/decode_et_simu_2ch_beamformit -%WER 5.43 [ 321 / 5907, 63 ins, 44 del, 214 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_far_room1 -%WER 6.75 [ 420 / 6226, 50 ins, 62 del, 308 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_14_1.0_far_room2 -%WER 6.87 [ 403 / 5868, 75 ins, 47 del, 281 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_0.0_far_room3 -%WER 5.59 [ 330 / 5907, 70 ins, 46 del, 214 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room1 -%WER 5.89 [ 367 / 6226, 45 ins, 62 del, 260 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room2 -%WER 6.61 [ 388 / 5868, 55 ins, 67 del, 266 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_0.5_near_room3 +%WER 6.97 [ 412 / 5907, 71 ins, 38 del, 303 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_16_0.5_far_room1 +%WER 12.50 [ 778 / 6226, 104 ins, 104 del, 570 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_12_0.0_far_room2 +%WER 15.59 [ 915 / 5868, 134 ins, 153 del, 628 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_0.0_far_room3 +%WER 7.36 [ 435 / 5907, 80 ins, 48 del, 307 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room1 +%WER 7.73 [ 481 / 6226, 52 ins, 81 del, 348 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room2 +%WER 8.64 [ 507 / 5868, 78 ins, 77 del, 352 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_1.0_near_room3 exp/tri3/decode_dt_real_8ch_beamformit -%WER 19.75 [ 289 / 1463, 41 ins, 38 del, 210 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_15_1.0_far_room1 -%WER 15.41 [ 247 / 1603, 27 ins, 47 del, 173 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_15_1.0_near_room1 +%WER 20.71 [ 303 / 1463, 41 ins, 43 del, 219 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_14_1.0_far_room1 +%WER 17.28 [ 277 / 1603, 24 ins, 57 del, 196 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_17_1.0_near_room1 exp/tri3/decode_dt_simu_8ch_beamformit -%WER 4.77 [ 194 / 4071, 39 ins, 27 del, 128 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_0.5_far_room1 -%WER 5.45 [ 221 / 4058, 46 ins, 24 del, 151 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_16_0.0_far_room2 -%WER 4.20 [ 170 / 4045, 25 ins, 26 del, 119 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_13_1.0_far_room3 -%WER 4.91 [ 200 / 4071, 55 ins, 20 del, 125 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_0.0_near_room1 -%WER 5.22 [ 212 / 4058, 33 ins, 26 del, 153 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_15_1.0_near_room2 -%WER 4.08 [ 165 / 4045, 28 ins, 20 del, 117 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_13_1.0_near_room3 +%WER 6.17 [ 251 / 4071, 41 ins, 43 del, 167 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_1.0_far_room1 +%WER 7.20 [ 292 / 4058, 58 ins, 44 del, 190 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_0.5_far_room2 +%WER 5.41 [ 219 / 4045, 43 ins, 27 del, 149 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_0.5_far_room3 +%WER 6.12 [ 249 / 4071, 51 ins, 37 del, 161 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_15_1.0_near_room1 +%WER 7.32 [ 297 / 4058, 105 ins, 27 del, 165 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_13_1.0_near_room2 +%WER 5.34 [ 216 / 4045, 48 ins, 31 del, 137 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_1.0_near_room3 exp/tri3/decode_et_real_8ch_beamformit -%WER 14.38 [ 426 / 2962, 72 ins, 71 del, 283 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_15_1.0_far_room1 -%WER 13.86 [ 434 / 3131, 68 ins, 70 del, 296 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_17_1.0_near_room1 +%WER 16.91 [ 501 / 2962, 93 ins, 71 del, 337 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_15_1.0_far_room1 +%WER 15.59 [ 488 / 3131, 86 ins, 74 del, 328 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_17_1.0_near_room1 exp/tri3/decode_et_simu_8ch_beamformit -%WER 5.59 [ 330 / 5907, 87 ins, 36 del, 207 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_far_room1 -%WER 6.10 [ 380 / 6226, 51 ins, 55 del, 274 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_1.0_far_room2 -%WER 6.24 [ 366 / 5868, 57 ins, 51 del, 258 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_far_room3 -%WER 5.62 [ 332 / 5907, 74 ins, 44 del, 214 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_16_1.0_near_room1 -%WER 5.73 [ 357 / 6226, 56 ins, 57 del, 244 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room2 -%WER 5.93 [ 348 / 5868, 62 ins, 52 del, 234 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_near_room3 +%WER 6.57 [ 388 / 5907, 71 ins, 49 del, 268 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_1.0_far_room1 +%WER 7.60 [ 473 / 6226, 81 ins, 65 del, 327 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_0.5_far_room2 +%WER 7.26 [ 426 / 5868, 64 ins, 67 del, 295 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_far_room3 +%WER 7.11 [ 420 / 5907, 77 ins, 53 del, 290 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room1 +%WER 7.07 [ 440 / 6226, 78 ins, 66 del, 296 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room2 +%WER 7.00 [ 411 / 5868, 83 ins, 59 del, 269 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_near_room3 + +exp/tri3/decode_et_cln +%WER 6.74 [ 1213 / 18001, 234 ins, 158 del, 821 sub ] exp/tri3/decode_et_cln/wer_15_1.0_cln_room ######################################## +TDNN RESULTs: +exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* +%WER 20.44 [ 299 / 1463, 22 ins, 75 del, 202 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.5_far_room1 +%WER 18.59 [ 298 / 1603, 16 ins, 79 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_13_0.0_near_room1 +%WER 17.91 [ 262 / 1463, 24 ins, 59 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_8_0.5_far_room1 +%WER 16.16 [ 259 / 1603, 16 ins, 64 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_11_0.0_near_room1 +%WER 16.13 [ 236 / 1463, 21 ins, 66 del, 149 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_10_1.0_far_room1 +%WER 11.92 [ 191 / 1603, 16 ins, 37 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_12_0.0_near_room1 +%WER 18.25 [ 267 / 1463, 21 ins, 70 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_9_0.5_far_room1 +%WER 14.60 [ 234 / 1603, 14 ins, 51 del, 169 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_10_0.0_near_room1 +%WER 12.24 [ 179 / 1463, 10 ins, 51 del, 118 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_1.0_far_room1 +%WER 9.61 [ 154 / 1603, 15 ins, 30 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_0.0_near_room1 +%WER 16.20 [ 237 / 1463, 19 ins, 80 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_11_0.5_far_room1 +%WER 12.98 [ 208 / 1603, 20 ins, 54 del, 134 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_10_0.0_near_room1 +%WER 3.19 [ 130 / 4071, 16 ins, 27 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room1 +%WER 7.29 [ 296 / 4058, 35 ins, 51 del, 210 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 +%WER 7.17 [ 290 / 4045, 31 ins, 57 del, 202 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_far_room3 +%WER 3.00 [ 122 / 4071, 14 ins, 22 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_near_room1 +%WER 3.43 [ 139 / 4058, 12 ins, 21 del, 106 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_1.0_near_room2 +%WER 3.86 [ 156 / 4045, 19 ins, 28 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.0_near_room3 +%WER 3.12 [ 127 / 4071, 20 ins, 19 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_far_room1 +%WER 6.73 [ 273 / 4058, 33 ins, 46 del, 194 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.5_far_room2 +%WER 6.50 [ 263 / 4045, 34 ins, 47 del, 182 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_9_0.0_far_room3 +%WER 3.00 [ 122 / 4071, 15 ins, 13 del, 94 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room1 +%WER 3.25 [ 132 / 4058, 21 ins, 15 del, 96 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_near_room2 +%WER 3.78 [ 153 / 4045, 23 ins, 24 del, 106 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room3 +%WER 3.10 [ 126 / 4071, 22 ins, 18 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_far_room1 +%WER 4.44 [ 180 / 4058, 16 ins, 36 del, 128 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_1.0_far_room2 +%WER 4.70 [ 190 / 4045, 30 ins, 26 del, 134 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_0.0_far_room3 +%WER 2.82 [ 115 / 4071, 12 ins, 18 del, 85 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_near_room1 +%WER 2.88 [ 117 / 4058, 8 ins, 26 del, 83 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_1.0_near_room2 +%WER 3.39 [ 137 / 4045, 25 ins, 20 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_7_0.0_near_room3 +%WER 3.00 [ 122 / 4071, 19 ins, 17 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_far_room1 +%WER 5.40 [ 219 / 4058, 26 ins, 38 del, 155 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_7_1.0_far_room2 +%WER 5.86 [ 237 / 4045, 22 ins, 44 del, 171 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_9_0.5_far_room3 +%WER 2.97 [ 121 / 4071, 13 ins, 16 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_near_room1 +%WER 3.40 [ 138 / 4058, 20 ins, 20 del, 98 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_near_room2 +%WER 3.76 [ 152 / 4045, 14 ins, 30 del, 108 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_1.0_near_room3 +%WER 3.00 [ 122 / 4071, 13 ins, 22 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.5_far_room1 +%WER 3.03 [ 123 / 4058, 21 ins, 14 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.0_far_room2 +%WER 2.94 [ 119 / 4045, 12 ins, 23 del, 84 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_1.0_far_room3 +%WER 2.95 [ 120 / 4071, 15 ins, 14 del, 91 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.0_near_room1 +%WER 2.64 [ 107 / 4058, 14 ins, 18 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_0.0_near_room2 +%WER 2.84 [ 115 / 4045, 13 ins, 27 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_9_1.0_near_room3 +%WER 2.92 [ 119 / 4071, 14 ins, 21 del, 84 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.0_far_room1 +%WER 3.97 [ 161 / 4058, 14 ins, 38 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_1.0_far_room2 +%WER 3.44 [ 139 / 4045, 14 ins, 21 del, 104 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_0.5_far_room3 +%WER 2.92 [ 119 / 4071, 13 ins, 18 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.0_near_room1 +%WER 3.30 [ 134 / 4058, 13 ins, 29 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_near_room2 +%WER 3.36 [ 136 / 4045, 15 ins, 27 del, 94 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_near_room3 + +exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* +%WER 3.55 [ 639 / 18001, 77 ins, 125 del, 437 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_cln/wer_9_1.0_cln_room +%WER 19.85 [ 588 / 2962, 52 ins, 114 del, 422 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_12_0.0_far_room1 +%WER 18.24 [ 571 / 3131, 39 ins, 159 del, 373 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_1.0_near_room1 +%WER 18.10 [ 536 / 2962, 38 ins, 116 del, 382 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_11_0.5_far_room1 +%WER 15.81 [ 495 / 3131, 61 ins, 96 del, 338 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_10_0.0_near_room1 +%WER 14.21 [ 421 / 2962, 54 ins, 51 del, 316 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.0_far_room1 +%WER 11.27 [ 353 / 3131, 39 ins, 80 del, 234 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.5_near_room1 +%WER 15.94 [ 472 / 2962, 44 ins, 101 del, 327 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_11_0.5_far_room1 +%WER 14.53 [ 455 / 3131, 52 ins, 97 del, 306 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_10_0.5_near_room1 +%WER 10.23 [ 303 / 2962, 41 ins, 44 del, 218 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_0.5_far_room1 +%WER 9.17 [ 287 / 3131, 35 ins, 61 del, 191 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_1.0_near_room1 +%WER 12.90 [ 382 / 2962, 30 ins, 77 del, 275 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_10_1.0_far_room1 +%WER 11.75 [ 368 / 3131, 58 ins, 65 del, 245 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_10_0.0_near_room1 +%WER 3.74 [ 221 / 5907, 24 ins, 41 del, 156 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room1 +%WER 7.66 [ 477 / 6226, 43 ins, 100 del, 334 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_0.5_far_room2 +%WER 7.72 [ 453 / 5868, 33 ins, 114 del, 306 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 +%WER 3.28 [ 194 / 5907, 20 ins, 38 del, 136 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room1 +%WER 4.75 [ 296 / 6226, 27 ins, 66 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 +%WER 4.91 [ 288 / 5868, 31 ins, 56 del, 201 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room3 +%WER 3.69 [ 218 / 5907, 29 ins, 36 del, 153 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_0.5_far_room1 +%WER 7.04 [ 438 / 6226, 48 ins, 77 del, 313 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_7_0.5_far_room2 +%WER 7.17 [ 421 / 5868, 37 ins, 94 del, 290 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_1.0_far_room3 +%WER 3.22 [ 190 / 5907, 27 ins, 31 del, 132 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_10_0.0_near_room1 +%WER 4.72 [ 294 / 6226, 29 ins, 64 del, 201 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_9_1.0_near_room2 +%WER 4.87 [ 286 / 5868, 33 ins, 50 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_7_1.0_near_room3 +%WER 3.28 [ 194 / 5907, 20 ins, 36 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room1 +%WER 5.22 [ 325 / 6226, 36 ins, 72 del, 217 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room2 +%WER 6.00 [ 352 / 5868, 39 ins, 70 del, 243 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room3 +%WER 3.20 [ 189 / 5907, 28 ins, 29 del, 132 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_0.0_near_room1 +%WER 4.18 [ 260 / 6226, 24 ins, 60 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_near_room2 +%WER 4.26 [ 250 / 5868, 38 ins, 45 del, 167 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_8_0.5_near_room3 +%WER 3.39 [ 200 / 5907, 27 ins, 35 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_8_1.0_far_room1 +%WER 6.12 [ 381 / 6226, 28 ins, 94 del, 259 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_11_1.0_far_room2 +%WER 6.58 [ 386 / 5868, 39 ins, 77 del, 270 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_far_room3 +%WER 3.20 [ 189 / 5907, 29 ins, 30 del, 130 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_10_0.0_near_room1 +%WER 4.53 [ 282 / 6226, 29 ins, 61 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_near_room2 +%WER 4.48 [ 263 / 5868, 26 ins, 49 del, 188 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_near_room3 +%WER 3.54 [ 209 / 5907, 27 ins, 36 del, 146 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_0.5_far_room1 +%WER 4.11 [ 256 / 6226, 29 ins, 51 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room2 +%WER 3.89 [ 228 / 5868, 28 ins, 43 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room3 +%WER 3.22 [ 190 / 5907, 24 ins, 29 del, 137 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_0.5_near_room1 +%WER 3.71 [ 231 / 6226, 24 ins, 55 del, 152 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_1.0_near_room2 +%WER 3.66 [ 215 / 5868, 22 ins, 46 del, 147 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_near_room3 +%WER 3.50 [ 207 / 5907, 19 ins, 42 del, 146 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_1.0_far_room1 +%WER 5.08 [ 316 / 6226, 34 ins, 59 del, 223 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_7_1.0_far_room2 +%WER 4.46 [ 262 / 5868, 33 ins, 48 del, 181 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_0.5_far_room3 +%WER 3.35 [ 198 / 5907, 16 ins, 41 del, 141 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_1.0_near_room1 +%WER 4.42 [ 275 / 6226, 27 ins, 56 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_8_1.0_near_room2 +%WER 3.92 [ 230 / 5868, 37 ins, 36 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_0.0_near_room3 diff --git a/egs/reverb/s5/local/compute_se_scores.sh b/egs/reverb/s5/local/compute_se_scores.sh new file mode 100755 index 00000000000..d65fbbca2f4 --- /dev/null +++ b/egs/reverb/s5/local/compute_se_scores.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) +# Apache 2.0 + +# This script computes the dereverberation scores given in REVERB challenge +# Eg. local/compute_se_scores.sh --nch 8 /export/corpora5/REVERB_2014/REVERB ${PWD}/wav ${PWD}/local + +. ./cmd.sh +. ./path.sh +set -e +set -u +set -o pipefail + +cmd=run.pl +nch=8 + +. utils/parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Wrong #arguments ($#, expected 3)" + echo "Usage: local/compute_se.sh [options] " + echo "options" + echo " --cmd # Command to run in parallel with" + echo " --nch # nch of WPE to use for computing SE scores" + exit 1; +fi + +reverb_data=$1 +enhancement_directory=$2 +pesqdir=$3 +enhancement_directory_sim=$enhancement_directory/WPE/${nch}ch/REVERB_WSJCAM0_dt/data/ +enhancement_directory_real=$enhancement_directory/WPE/${nch}ch/MC_WSJ_AV_Dev/ + +expdir=${PWD}/exp/compute_se_${nch}ch +pushd local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools +$cmd $expdir/compute_se_real.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_RealData('$reverb_data','$enhancement_directory_real');exit" +$cmd $expdir/compute_se_sim.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_SimData('$reverb_data','$enhancement_directory_sim','$pesqdir');exit" +popd +mv local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/scores $expdir/ diff --git a/egs/reverb/s5/local/download_se_eval_tool.sh b/egs/reverb/s5/local/download_se_eval_tool.sh new file mode 100755 index 00000000000..c7b272907b6 --- /dev/null +++ b/egs/reverb/s5/local/download_se_eval_tool.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) +# This script downloads the official REVERB challenge SE scripts and SRMR toolbox +# This script also downloads and compiles PESQ +# please make sure that you or your institution have the license to report PESQ +# Apache 2.0 + +wget 'https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-P.862-200102-I!!SOFT-ZST-E&type=items' -O PESQ.zip +unzip PESQ.zip -d local/PESQ_sources +rm PESQ.zip +cd local/PESQ_sources/P862/Software/source +gcc *.c -lm -o PESQ +cd ../../../../../ +mv local/PESQ_sources/P862/Software/source/PESQ local/ + +wget 'https://reverb2014.dereverberation.com/tools/REVERB-SPEENHA.Release04Oct.zip' -O REVERB_scores.zip +unzip REVERB_scores.zip -d local/REVERB_scores_source +rm REVERB_scores.zip + +pushd local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools +sed -i 's/wavread/audioread/g' prog/score_sim.m +git clone https://github.com/MuSAELab/SRMRToolbox.git +sed -i 's/wavread/audioread/g' SRMRToolbox/libs/preprocess.m +sed -i 's/SRMR_main/SRMR/g' prog/score_real.m +sed -i 's/SRMR_main/SRMR/g' prog/score_sim.m +sed -i 's/+wb\ //g' prog/calcpesq.m +sed -i 's/pesq_/_pesq_/g' prog/calcpesq.m +sed -ie '30d;31d' prog/calcpesq.m +patch score_RealData.m -i ../../../score_RealData.patch -o score_RealData_new.m +mv score_RealData_new.m score_RealData.m +patch score_SimData.m -i ../../../score_SimData.patch -o score_SimData_new.m +mv score_SimData_new.m score_SimData.m +popd diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh index e1fca60a2dd..8867961dcdd 100755 --- a/egs/reverb/s5/local/get_results.sh +++ b/egs/reverb/s5/local/get_results.sh @@ -15,6 +15,18 @@ echo "" echo "exp/tri3/decode_et_simu_1ch" cat exp/tri3/decode_et_simu_1ch/scoring_kaldi/best_wer* echo "" +echo "exp/tri3/decode_dt_real_1ch_wpe" +cat exp/tri3/decode_dt_real_1ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_dt_simu_1ch_wpe" +cat exp/tri3/decode_dt_simu_1ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_real_1ch_wpe" +cat exp/tri3/decode_et_real_1ch_wpe/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_simu_1ch_wpe" +cat exp/tri3/decode_et_simu_1ch_wpe/scoring_kaldi/best_wer* +echo "" echo "exp/tri3/decode_dt_real_2ch_wpe" cat exp/tri3/decode_dt_real_2ch_wpe/scoring_kaldi/best_wer* echo "" @@ -60,8 +72,11 @@ echo "" echo "exp/tri3/decode_et_real_8ch_beamformit" cat exp/tri3/decode_et_real_8ch_beamformit/scoring_kaldi/best_wer* echo "" -echo "exp/tri3/decode_et_simu_8ch_beamformit" -cat exp/tri3/decode_et_simu_8ch_beamformit/scoring_kaldi/best_wer* +echo "exp/tri3/decode_dt_cln" +cat exp/tri3/decode_dt_cln/scoring_kaldi/best_wer* +echo "" +echo "exp/tri3/decode_et_cln" +cat exp/tri3/decode_et_cln/scoring_kaldi/best_wer* echo "########################################" echo "TDNN RESULTs:" echo "exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt*" diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh index cfdc226dd1e..8757021ddd7 100755 --- a/egs/reverb/s5/local/prepare_simu_data.sh +++ b/egs/reverb/s5/local/prepare_simu_data.sh @@ -54,6 +54,12 @@ for nch in 1 2 8; do perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${reverb}/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch_wav.scp + if [ ${nch} == 1 ]; then + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e cln`; do + perl -se 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, " ", $dir, $_, "\n"; } }' -- -dir=${reverb}/REVERB_WSJCAM0_${task}/data ${taskdir}/$x |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_cln_wav.scp + fi done task=tr @@ -83,6 +89,14 @@ for nch in 1 2 8; do sed -e "s/^\(...\)/\1_${x}_\1/" done > ${dir}/${task}_simu_${nch}ch.trans1 || exit 1; cat ${dir}/${task}_simu_${nch}ch.trans1 | local/normalize_transcript.pl ${noiseword} > ${dir}/${task}_simu_${nch}ch.txt || exit 1; + if [ ${nch} == 1 ]; then + for x in `ls ${taskdir} | grep SimData | grep _${task}_ | grep -e cln`; do + perl -e 'while (<>) { chomp; if (m/\/(\w{8})[^\/]+$/) { print $1, "\n"; } }' ${taskdir}/$x |\ + perl local/find_transcripts_singledot.pl ${dir}/${task}.dot |\ + sed -e "s/^\(...\)/\1_${x}_\1/" + done > ${dir}/${task}_cln.trans1 || exit 1; + cat ${dir}/${task}_cln.trans1 | local/normalize_transcript.pl ${noiseword} > ${dir}/${task}_cln.txt || exit 1; + fi done # Make the utt2spk and spk2utt files. @@ -90,6 +104,10 @@ for nch in 1 2 8; do cat ${dir}/${task}_simu_${nch}ch_wav.scp | awk '{print $1}' | awk -F '_' '{print $0 " " $1}' > ${dir}/${task}_simu_${nch}ch.utt2spk || exit 1; cat ${dir}/${task}_simu_${nch}ch.utt2spk | ./utils/utt2spk_to_spk2utt.pl > ${dir}/${task}_simu_${nch}ch.spk2utt || exit 1; done + for task in dt et; do + cat ${dir}/${task}_cln_wav.scp | awk '{print $1}' | awk -F '_' '{print $0 " " $1}' > ${dir}/${task}_cln.utt2spk || exit 1; + cat ${dir}/${task}_cln.utt2spk | ./utils/utt2spk_to_spk2utt.pl > ${dir}/${task}_cln.spk2utt || exit 1; + done done # finally copy the above files to the data directory @@ -118,6 +136,14 @@ for nch in 1 2 8; do sort ${dir}/${task}_simu_1ch.utt2spk > ${datadir}/utt2spk sort ${dir}/${task}_simu_1ch.spk2utt > ${datadir}/spk2utt ./utils/fix_data_dir.sh ${datadir} + else + datadir=data/${task}_cln + mkdir -p ${datadir} + sort ${dir}/${task}_cln_wav.scp > ${datadir}/wav.scp + sort ${dir}/${task}_cln.txt > ${datadir}/text + sort ${dir}/${task}_cln.utt2spk > ${datadir}/utt2spk + sort ${dir}/${task}_cln.spk2utt > ${datadir}/spk2utt + ./utils/fix_data_dir.sh ${datadir} fi fi done diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index 00adb14d854..e3dc606776b 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -63,6 +63,9 @@ mkdir -p $dir/scoring_kaldi if echo $data | grep -q "real"; then tasks="\ near_room1 far_room1" +elif echo $data | grep -q "cln"; then + tasks="\ + cln_room" else tasks="\ near_room1 far_room1 \ diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 3cc3efca9e7..89f77e3e01a 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -36,6 +36,10 @@ fi . ./path.sh stage=0 +nch_se=8 +# flag for turing on computation of dereverberation measures +# please make sure that you or your institution have the license to report PESQ before turning on the flag +compute_se=false . utils/parse_options.sh # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : @@ -57,7 +61,7 @@ fi #training set and test set train_set=tr_simu_8ch -test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe" +test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe dt_cln et_cln" # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k" @@ -68,6 +72,7 @@ nj=92 decode_nj=10 wavdir=${PWD}/wav +pesqdir=${PWD}/local if [ ${stage} -le 1 ]; then # data preparation echo "stage 0: Data preparation" @@ -81,7 +86,18 @@ if [ $stage -le 2 ]; then local/run_beamform.sh ${wavdir}/WPE/ fi -if [ $stage -le 3 ]; then +# Compute dereverberation scores +if [ $stage -le 3 ] && $compute_se; then + if [ ! -d local/REVERB_scores_source ] || [ ! -d local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools/SRMRToolbox ] || [ ! -f local/PESQ ]; then + # download and install speech enhancement evaluation tools + local/download_se_eval_tool.sh + fi + local/compute_se_scores.sh --nch $nch_se $reverb $wavdir $pesqdir + cat exp/compute_se_${nch_se}ch/scores/score_SimData + cat exp/compute_se_${nch_se}ch/scores/score_RealData +fi + +if [ $stage -le 4 ]; then # Prepare wsjcam0 clean data and wsj0 language model. local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 @@ -105,14 +121,14 @@ if [ $stage -le 3 ]; then data/lang $LM data/local/dict/lexicon.txt data/lang fi -if [ $stage -le 4 ]; then +if [ $stage -le 5 ]; then for dset in ${train_set} ${test_sets}; do utils/copy_data_dir.sh data/${dset} data/${dset}_nosplit utils/data/modify_speaker_info.sh --seconds-per-spk-max 180 data/${dset}_nosplit data/${dset} done fi -if [ $stage -le 5 ]; then +if [ $stage -le 6 ]; then # Extract MFCC features for train and test sets. mfccdir=mfcc for x in ${train_set} ${test_sets}; do @@ -122,13 +138,13 @@ if [ $stage -le 5 ]; then done fi -if [ $stage -le 6 ]; then +if [ $stage -le 7 ]; then # Starting basic training on MFCC features steps/train_mono.sh --nj $nj --cmd "$train_cmd" \ data/${train_set} data/lang exp/mono fi -if [ $stage -le 7 ]; then +if [ $stage -le 8 ]; then steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/${train_set} data/lang exp/mono exp/mono_ali @@ -136,7 +152,7 @@ if [ $stage -le 7 ]; then 2500 30000 data/${train_set} data/lang exp/mono_ali exp/tri1 fi -if [ $stage -le 8 ]; then +if [ $stage -le 9 ]; then steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/${train_set} data/lang exp/tri1 exp/tri1_ali @@ -144,7 +160,7 @@ if [ $stage -le 8 ]; then 4000 50000 data/${train_set} data/lang exp/tri1_ali exp/tri2 fi -if [ $stage -le 9 ]; then +if [ $stage -le 10 ]; then utils/mkgraph.sh data/lang_test_$lm exp/tri2 exp/tri2/graph for dset in ${test_sets}; do steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ @@ -153,7 +169,7 @@ if [ $stage -le 9 ]; then wait fi -if [ $stage -le 10 ]; then +if [ $stage -le 11 ]; then steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/${train_set} data/lang exp/tri2 exp/tri2_ali @@ -161,7 +177,7 @@ if [ $stage -le 10 ]; then 5000 100000 data/${train_set} data/lang exp/tri2_ali exp/tri3 fi -if [ $stage -le 11 ]; then +if [ $stage -le 12 ]; then utils/mkgraph.sh data/lang_test_$lm exp/tri3 exp/tri3/graph for dset in ${test_sets}; do steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ @@ -170,13 +186,13 @@ if [ $stage -le 11 ]; then wait fi -if [ $stage -le 12 ]; then +if [ $stage -le 13 ]; then # chain TDNN local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set} --test-sets "$test_sets" --gmm tri3 --nnet3-affix _${train_set} \ --lm-suffix _test_$lm fi # get all WERs. -if [ $stage -le 13 ]; then +if [ $stage -le 14 ]; then local/get_results.sh fi From 3fb2981ce171565f4e850b5d6593c86c6784dcd3 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Wed, 7 Nov 2018 16:44:34 -0500 Subject: [PATCH 27/39] Minor modification in scoring script --- egs/reverb/s5/local/score.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh index e3dc606776b..66bc976333f 100755 --- a/egs/reverb/s5/local/score.sh +++ b/egs/reverb/s5/local/score.sh @@ -65,7 +65,7 @@ if echo $data | grep -q "real"; then near_room1 far_room1" elif echo $data | grep -q "cln"; then tasks="\ - cln_room" + cln_room1 cln_room2 cln_room3" else tasks="\ near_room1 far_room1 \ From f453337746235873f519cf264e0db61d965185c4 Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Wed, 7 Nov 2018 17:50:33 -0500 Subject: [PATCH 28/39] Added patch files --- egs/reverb/s5/local/score_RealData.patch | 14 ++++++++++++++ egs/reverb/s5/local/score_SimData.patch | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 egs/reverb/s5/local/score_RealData.patch create mode 100644 egs/reverb/s5/local/score_SimData.patch diff --git a/egs/reverb/s5/local/score_RealData.patch b/egs/reverb/s5/local/score_RealData.patch new file mode 100644 index 00000000000..cafa521d483 --- /dev/null +++ b/egs/reverb/s5/local/score_RealData.patch @@ -0,0 +1,14 @@ +11c11 +< clear all; +--- +> function score_RealData(download_from_ldc,senhroot) +26c26,27 +< srmrdir = 'SRMRtoolbox-ReverbChallenge'; +--- +> srmrdir = 'SRMRToolbox'; +> addpath(genpath('SRMRToolbox/libs')); +32d32 +< senhroot = '../output/RealData'; +129a130,131 +> +> end diff --git a/egs/reverb/s5/local/score_SimData.patch b/egs/reverb/s5/local/score_SimData.patch new file mode 100644 index 00000000000..ccfda90910b --- /dev/null +++ b/egs/reverb/s5/local/score_SimData.patch @@ -0,0 +1,19 @@ +11c11 +< clear all; +--- +> function score_SimData(download_from_ldc,senhroot,pesqdir) +26,27c26,27 +< srmrdir = 'SRMRtoolbox-ReverbChallenge'; +< % pesqdir = '/directory/where/pesq/executable/is/stored'; +--- +> srmrdir = 'SRMRToolbox'; +> addpath(genpath('SRMRToolbox/libs')); +36d35 +< senhroot = '../output/SimData'; +471c470,472 +< fclose(fid); +\ No newline at end of file +--- +> fclose(fid); +> +> end From 53b3259cd6580e8ca9dcd17d289ee33636fada2f Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Thu, 8 Nov 2018 15:10:27 -0500 Subject: [PATCH 29/39] Updated RESULTS according to Shinji's comments --- egs/reverb/s5/RESULTS | 238 +++++++++++++++++++++++++----------------- 1 file changed, 143 insertions(+), 95 deletions(-) diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index bc994e05d48..c43ba3526a0 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -1,5 +1,9 @@ ######################################## GMM RESULTs: +######################################## + +No Front-End +######################################## exp/tri3/decode_dt_real_1ch %WER 34.59 [ 506 / 1463, 40 ins, 113 del, 353 sub ] exp/tri3/decode_dt_real_1ch/wer_15_0.5_far_room1 %WER 30.26 [ 485 / 1603, 42 ins, 112 del, 331 sub ] exp/tri3/decode_dt_real_1ch/wer_17_0.0_near_room1 @@ -24,6 +28,8 @@ exp/tri3/decode_et_simu_1ch %WER 9.46 [ 589 / 6226, 54 ins, 116 del, 419 sub ] exp/tri3/decode_et_simu_1ch/wer_15_1.0_near_room2 %WER 10.62 [ 623 / 5868, 81 ins, 105 del, 437 sub ] exp/tri3/decode_et_simu_1ch/wer_12_1.0_near_room3 +1ch - WPE +######################################## exp/tri3/decode_dt_real_1ch_wpe %WER 33.83 [ 495 / 1463, 42 ins, 97 del, 356 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_17_0.0_far_room1 %WER 27.76 [ 445 / 1603, 34 ins, 110 del, 301 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_16_0.0_near_room1 @@ -48,54 +54,8 @@ exp/tri3/decode_et_simu_1ch_wpe %WER 8.54 [ 532 / 6226, 55 ins, 96 del, 381 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_17_0.5_near_room2 %WER 10.07 [ 591 / 5868, 94 ins, 80 del, 417 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_12_0.5_near_room3 -exp/tri3/decode_dt_real_2ch_wpe -%WER 31.51 [ 461 / 1463, 39 ins, 87 del, 335 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.0_far_room1 -%WER 26.51 [ 425 / 1603, 31 ins, 105 del, 289 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.0_near_room1 - -exp/tri3/decode_dt_simu_2ch_wpe -%WER 6.24 [ 254 / 4071, 51 ins, 34 del, 169 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.0_far_room1 -%WER 14.32 [ 581 / 4058, 78 ins, 98 del, 405 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.0_far_room2 -%WER 17.33 [ 701 / 4045, 87 ins, 154 del, 460 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_14_1.0_far_room3 -%WER 5.75 [ 234 / 4071, 35 ins, 40 del, 159 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_17_1.0_near_room1 -%WER 7.07 [ 287 / 4058, 40 ins, 41 del, 206 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_1.0_near_room2 -%WER 7.54 [ 305 / 4045, 41 ins, 45 del, 219 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.5_near_room3 - -exp/tri3/decode_et_real_2ch_wpe -%WER 26.81 [ 794 / 2962, 113 ins, 116 del, 565 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_16_0.0_far_room1 -%WER 28.11 [ 880 / 3131, 80 ins, 189 del, 611 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_17_1.0_near_room1 - -exp/tri3/decode_et_simu_2ch_wpe -%WER 7.03 [ 415 / 5907, 73 ins, 45 del, 297 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_16_0.5_far_room1 -%WER 14.63 [ 911 / 6226, 93 ins, 161 del, 657 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_15_0.5_far_room2 -%WER 18.58 [ 1090 / 5868, 128 ins, 213 del, 749 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_14_0.5_far_room3 -%WER 7.06 [ 417 / 5907, 68 ins, 47 del, 302 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_17_1.0_near_room1 -%WER 8.75 [ 545 / 6226, 77 ins, 64 del, 404 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_13_0.5_near_room2 -%WER 9.08 [ 533 / 5868, 86 ins, 82 del, 365 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_12_1.0_near_room3 - -exp/tri3/decode_dt_real_8ch_wpe -%WER 27.41 [ 401 / 1463, 44 ins, 71 del, 286 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_1.0_far_room1 -%WER 23.96 [ 384 / 1603, 42 ins, 83 del, 259 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_1.0_near_room1 - -exp/tri3/decode_dt_simu_8ch_wpe -%WER 6.31 [ 257 / 4071, 43 ins, 46 del, 168 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_far_room1 -%WER 8.92 [ 362 / 4058, 63 ins, 56 del, 243 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_12_1.0_far_room2 -%WER 9.67 [ 391 / 4045, 131 ins, 42 del, 218 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_14_1.0_far_room3 -%WER 6.07 [ 247 / 4071, 39 ins, 39 del, 169 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_near_room1 -%WER 7.22 [ 293 / 4058, 60 ins, 32 del, 201 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_13_0.5_near_room2 -%WER 6.23 [ 252 / 4045, 54 ins, 25 del, 173 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_14_0.0_near_room3 - -exp/tri3/decode_et_real_8ch_wpe -%WER 22.01 [ 652 / 2962, 118 ins, 87 del, 447 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_17_0.5_far_room1 -%WER 23.95 [ 750 / 3131, 134 ins, 125 del, 491 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_16_1.0_near_room1 - -exp/tri3/decode_et_simu_8ch_wpe -%WER 7.16 [ 423 / 5907, 89 ins, 44 del, 290 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_0.5_far_room1 -%WER 9.73 [ 606 / 6226, 87 ins, 90 del, 429 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_14_1.0_far_room2 -%WER 9.22 [ 541 / 5868, 105 ins, 77 del, 359 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_1.0_far_room3 -%WER 7.08 [ 418 / 5907, 70 ins, 45 del, 303 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_17_1.0_near_room1 -%WER 7.98 [ 497 / 6226, 61 ins, 80 del, 356 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_1.0_near_room2 -%WER 7.91 [ 464 / 5868, 83 ins, 67 del, 314 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_16_1.0_near_room3 - +2ch - WPE+BeamformIt +######################################## exp/tri3/decode_dt_real_2ch_beamformit %WER 28.43 [ 416 / 1463, 32 ins, 82 del, 302 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_1.0_far_room1 %WER 23.46 [ 376 / 1603, 26 ins, 98 del, 252 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_1.0_near_room1 @@ -120,6 +80,8 @@ exp/tri3/decode_et_simu_2ch_beamformit %WER 7.73 [ 481 / 6226, 52 ins, 81 del, 348 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room2 %WER 8.64 [ 507 / 5868, 78 ins, 77 del, 352 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_1.0_near_room3 +8ch - WPE+BeamformIt +######################################## exp/tri3/decode_dt_real_8ch_beamformit %WER 20.71 [ 303 / 1463, 41 ins, 43 del, 219 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_14_1.0_far_room1 %WER 17.28 [ 277 / 1603, 24 ins, 57 del, 196 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_17_1.0_near_room1 @@ -144,107 +106,193 @@ exp/tri3/decode_et_simu_8ch_beamformit %WER 7.07 [ 440 / 6226, 78 ins, 66 del, 296 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room2 %WER 7.00 [ 411 / 5868, 83 ins, 59 del, 269 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_near_room3 -exp/tri3/decode_et_cln -%WER 6.74 [ 1213 / 18001, 234 ins, 158 del, 821 sub ] exp/tri3/decode_et_cln/wer_15_1.0_cln_room ######################################## TDNN RESULTs: +######################################## + exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* +######################################## + +No Front-End +######################################## %WER 20.44 [ 299 / 1463, 22 ins, 75 del, 202 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.5_far_room1 %WER 18.59 [ 298 / 1603, 16 ins, 79 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_13_0.0_near_room1 -%WER 17.91 [ 262 / 1463, 24 ins, 59 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_8_0.5_far_room1 -%WER 16.16 [ 259 / 1603, 16 ins, 64 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_11_0.0_near_room1 -%WER 16.13 [ 236 / 1463, 21 ins, 66 del, 149 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_10_1.0_far_room1 -%WER 11.92 [ 191 / 1603, 16 ins, 37 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_12_0.0_near_room1 -%WER 18.25 [ 267 / 1463, 21 ins, 70 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_9_0.5_far_room1 -%WER 14.60 [ 234 / 1603, 14 ins, 51 del, 169 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_10_0.0_near_room1 -%WER 12.24 [ 179 / 1463, 10 ins, 51 del, 118 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_1.0_far_room1 -%WER 9.61 [ 154 / 1603, 15 ins, 30 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_0.0_near_room1 -%WER 16.20 [ 237 / 1463, 19 ins, 80 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_11_0.5_far_room1 -%WER 12.98 [ 208 / 1603, 20 ins, 54 del, 134 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_10_0.0_near_room1 %WER 3.19 [ 130 / 4071, 16 ins, 27 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room1 %WER 7.29 [ 296 / 4058, 35 ins, 51 del, 210 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 %WER 7.17 [ 290 / 4045, 31 ins, 57 del, 202 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_far_room3 %WER 3.00 [ 122 / 4071, 14 ins, 22 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_near_room1 %WER 3.43 [ 139 / 4058, 12 ins, 21 del, 106 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_1.0_near_room2 %WER 3.86 [ 156 / 4045, 19 ins, 28 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.0_near_room3 + +1ch - WPE +######################################## +%WER 17.91 [ 262 / 1463, 24 ins, 59 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_8_0.5_far_room1 +%WER 16.16 [ 259 / 1603, 16 ins, 64 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_11_0.0_near_room1 %WER 3.12 [ 127 / 4071, 20 ins, 19 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_far_room1 %WER 6.73 [ 273 / 4058, 33 ins, 46 del, 194 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.5_far_room2 %WER 6.50 [ 263 / 4045, 34 ins, 47 del, 182 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_9_0.0_far_room3 %WER 3.00 [ 122 / 4071, 15 ins, 13 del, 94 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room1 %WER 3.25 [ 132 / 4058, 21 ins, 15 del, 96 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_near_room2 %WER 3.78 [ 153 / 4045, 23 ins, 24 del, 106 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room3 + +2ch - WPE+BeamformIt +######################################## +%WER 16.13 [ 236 / 1463, 21 ins, 66 del, 149 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_10_1.0_far_room1 +%WER 11.92 [ 191 / 1603, 16 ins, 37 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_12_0.0_near_room1 %WER 3.10 [ 126 / 4071, 22 ins, 18 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_far_room1 %WER 4.44 [ 180 / 4058, 16 ins, 36 del, 128 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_1.0_far_room2 %WER 4.70 [ 190 / 4045, 30 ins, 26 del, 134 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_0.0_far_room3 %WER 2.82 [ 115 / 4071, 12 ins, 18 del, 85 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_near_room1 %WER 2.88 [ 117 / 4058, 8 ins, 26 del, 83 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_1.0_near_room2 %WER 3.39 [ 137 / 4045, 25 ins, 20 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_7_0.0_near_room3 -%WER 3.00 [ 122 / 4071, 19 ins, 17 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_far_room1 -%WER 5.40 [ 219 / 4058, 26 ins, 38 del, 155 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_7_1.0_far_room2 -%WER 5.86 [ 237 / 4045, 22 ins, 44 del, 171 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_9_0.5_far_room3 -%WER 2.97 [ 121 / 4071, 13 ins, 16 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_near_room1 -%WER 3.40 [ 138 / 4058, 20 ins, 20 del, 98 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_near_room2 -%WER 3.76 [ 152 / 4045, 14 ins, 30 del, 108 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_1.0_near_room3 + +8ch - WPE+BeamformIt +######################################## +%WER 12.24 [ 179 / 1463, 10 ins, 51 del, 118 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_1.0_far_room1 +%WER 9.61 [ 154 / 1603, 15 ins, 30 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_0.0_near_room1 %WER 3.00 [ 122 / 4071, 13 ins, 22 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.5_far_room1 %WER 3.03 [ 123 / 4058, 21 ins, 14 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.0_far_room2 %WER 2.94 [ 119 / 4045, 12 ins, 23 del, 84 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_1.0_far_room3 %WER 2.95 [ 120 / 4071, 15 ins, 14 del, 91 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.0_near_room1 %WER 2.64 [ 107 / 4058, 14 ins, 18 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_0.0_near_room2 %WER 2.84 [ 115 / 4045, 13 ins, 27 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_9_1.0_near_room3 -%WER 2.92 [ 119 / 4071, 14 ins, 21 del, 84 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.0_far_room1 -%WER 3.97 [ 161 / 4058, 14 ins, 38 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_1.0_far_room2 -%WER 3.44 [ 139 / 4045, 14 ins, 21 del, 104 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_0.5_far_room3 -%WER 2.92 [ 119 / 4071, 13 ins, 18 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.0_near_room1 -%WER 3.30 [ 134 / 4058, 13 ins, 29 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_near_room2 -%WER 3.36 [ 136 / 4045, 15 ins, 27 del, 94 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_near_room3 exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* -%WER 3.55 [ 639 / 18001, 77 ins, 125 del, 437 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_cln/wer_9_1.0_cln_room +######################################## + +No Front-End +######################################## %WER 19.85 [ 588 / 2962, 52 ins, 114 del, 422 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_12_0.0_far_room1 %WER 18.24 [ 571 / 3131, 39 ins, 159 del, 373 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_1.0_near_room1 -%WER 18.10 [ 536 / 2962, 38 ins, 116 del, 382 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_11_0.5_far_room1 -%WER 15.81 [ 495 / 3131, 61 ins, 96 del, 338 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_10_0.0_near_room1 -%WER 14.21 [ 421 / 2962, 54 ins, 51 del, 316 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.0_far_room1 -%WER 11.27 [ 353 / 3131, 39 ins, 80 del, 234 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.5_near_room1 -%WER 15.94 [ 472 / 2962, 44 ins, 101 del, 327 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_11_0.5_far_room1 -%WER 14.53 [ 455 / 3131, 52 ins, 97 del, 306 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_10_0.5_near_room1 -%WER 10.23 [ 303 / 2962, 41 ins, 44 del, 218 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_0.5_far_room1 -%WER 9.17 [ 287 / 3131, 35 ins, 61 del, 191 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_1.0_near_room1 -%WER 12.90 [ 382 / 2962, 30 ins, 77 del, 275 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_10_1.0_far_room1 -%WER 11.75 [ 368 / 3131, 58 ins, 65 del, 245 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_10_0.0_near_room1 %WER 3.74 [ 221 / 5907, 24 ins, 41 del, 156 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room1 %WER 7.66 [ 477 / 6226, 43 ins, 100 del, 334 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_0.5_far_room2 %WER 7.72 [ 453 / 5868, 33 ins, 114 del, 306 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 %WER 3.28 [ 194 / 5907, 20 ins, 38 del, 136 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room1 %WER 4.75 [ 296 / 6226, 27 ins, 66 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 %WER 4.91 [ 288 / 5868, 31 ins, 56 del, 201 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room3 + +1ch - WPE +######################################## +%WER 18.10 [ 536 / 2962, 38 ins, 116 del, 382 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_11_0.5_far_room1 +%WER 15.81 [ 495 / 3131, 61 ins, 96 del, 338 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_10_0.0_near_room1 %WER 3.69 [ 218 / 5907, 29 ins, 36 del, 153 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_0.5_far_room1 %WER 7.04 [ 438 / 6226, 48 ins, 77 del, 313 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_7_0.5_far_room2 %WER 7.17 [ 421 / 5868, 37 ins, 94 del, 290 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_1.0_far_room3 %WER 3.22 [ 190 / 5907, 27 ins, 31 del, 132 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_10_0.0_near_room1 %WER 4.72 [ 294 / 6226, 29 ins, 64 del, 201 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_9_1.0_near_room2 %WER 4.87 [ 286 / 5868, 33 ins, 50 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_7_1.0_near_room3 + +2ch - WPE+BeamformIt +######################################## +%WER 14.21 [ 421 / 2962, 54 ins, 51 del, 316 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.0_far_room1 +%WER 11.27 [ 353 / 3131, 39 ins, 80 del, 234 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.5_near_room1 %WER 3.28 [ 194 / 5907, 20 ins, 36 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room1 %WER 5.22 [ 325 / 6226, 36 ins, 72 del, 217 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room2 %WER 6.00 [ 352 / 5868, 39 ins, 70 del, 243 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room3 %WER 3.20 [ 189 / 5907, 28 ins, 29 del, 132 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_0.0_near_room1 %WER 4.18 [ 260 / 6226, 24 ins, 60 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_near_room2 %WER 4.26 [ 250 / 5868, 38 ins, 45 del, 167 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_8_0.5_near_room3 -%WER 3.39 [ 200 / 5907, 27 ins, 35 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_8_1.0_far_room1 -%WER 6.12 [ 381 / 6226, 28 ins, 94 del, 259 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_11_1.0_far_room2 -%WER 6.58 [ 386 / 5868, 39 ins, 77 del, 270 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_far_room3 -%WER 3.20 [ 189 / 5907, 29 ins, 30 del, 130 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_10_0.0_near_room1 -%WER 4.53 [ 282 / 6226, 29 ins, 61 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_near_room2 -%WER 4.48 [ 263 / 5868, 26 ins, 49 del, 188 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_near_room3 + +8ch - WPE+BeamformIt +######################################## +%WER 10.23 [ 303 / 2962, 41 ins, 44 del, 218 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_0.5_far_room1 +%WER 9.17 [ 287 / 3131, 35 ins, 61 del, 191 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_1.0_near_room1 %WER 3.54 [ 209 / 5907, 27 ins, 36 del, 146 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_0.5_far_room1 %WER 4.11 [ 256 / 6226, 29 ins, 51 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room2 %WER 3.89 [ 228 / 5868, 28 ins, 43 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room3 %WER 3.22 [ 190 / 5907, 24 ins, 29 del, 137 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_0.5_near_room1 %WER 3.71 [ 231 / 6226, 24 ins, 55 del, 152 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_1.0_near_room2 %WER 3.66 [ 215 / 5868, 22 ins, 46 del, 147 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_near_room3 -%WER 3.50 [ 207 / 5907, 19 ins, 42 del, 146 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_1.0_far_room1 -%WER 5.08 [ 316 / 6226, 34 ins, 59 del, 223 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_7_1.0_far_room2 -%WER 4.46 [ 262 / 5868, 33 ins, 48 del, 181 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_0.5_far_room3 -%WER 3.35 [ 198 / 5907, 16 ins, 41 del, 141 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_1.0_near_room1 -%WER 4.42 [ 275 / 6226, 27 ins, 56 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_8_1.0_near_room2 -%WER 3.92 [ 230 / 5868, 37 ins, 36 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_0.0_near_room3 +######################################## + +######################################## +SE Scores - 8ch - WPE+BeamformIt +######################################## + +Data type : SimData +######################################## + +============================================== + Cepstral distance in dB +---------------------------------------------- + mean median +---------------------------------------------- + org enh org enh +---------------------------------------------- + dt_far_room1 2.65 1.97 2.36 1.74 + dt_far_room2 5.08 4.66 4.94 4.30 + dt_far_room3 4.82 4.03 4.60 3.63 + dt_near_room1 1.96 1.67 1.67 1.37 + dt_near_room2 4.58 4.33 4.30 3.88 + dt_near_room3 4.20 3.71 3.91 3.26 +---------------------------------------------- + average 3.88 3.39 3.63 3.03 +============================================== + + +============================================== + SRMR (only mean used) +---------------------------------------------- + mean median +---------------------------------------------- + org enh org enh +---------------------------------------------- + dt_far_room1 4.63 4.91 - - + dt_far_room2 2.94 5.13 - - + dt_far_room3 2.76 4.87 - - + dt_near_room1 4.37 4.62 - - + dt_near_room2 3.67 4.39 - - + dt_near_room3 3.66 4.54 - - +---------------------------------------------- + average 3.67 4.74 - - +============================================== + + +============================================== + Log likelihood ratio +---------------------------------------------- + mean median +---------------------------------------------- + org enh org enh +---------------------------------------------- + dt_far_room1 0.38 0.33 0.35 0.30 + dt_far_room2 0.77 0.56 0.64 0.43 + dt_far_room3 0.85 0.52 0.77 0.45 + dt_near_room1 0.34 0.34 0.33 0.32 + dt_near_room2 0.51 0.50 0.43 0.33 + dt_near_room3 0.65 0.50 0.59 0.43 +---------------------------------------------- + average 0.58 0.46 0.52 0.38 +============================================== + + +============================================== + Frequency-weighted segmental SNR in dB +---------------------------------------------- + mean median +---------------------------------------------- + org enh org enh +---------------------------------------------- + dt_far_room1 6.75 8.99 8.93 11.06 + dt_far_room2 0.53 3.84 0.37 5.91 + dt_far_room3 0.14 3.76 0.39 6.57 + dt_near_room1 8.10 9.50 10.47 11.32 + dt_near_room2 3.07 5.10 4.58 8.12 + dt_near_room3 2.32 4.54 4.41 8.15 +---------------------------------------------- + average 3.48 5.96 4.86 8.52 +============================================== + +Data type : RealData +######################################## + +============================== + SRMR +------------------------------ + org enh +------------------------------ + dt_far_room1 3.51 6.03 + dt_near_room1 4.05 6.68 +------------------------------ + average 3.78 6.36 +============================== From 10d4713d03b0761b808250f03934989826a7cbcd Mon Sep 17 00:00:00 2001 From: Aswin Shanmugam Subramanian Date: Wed, 14 Nov 2018 19:04:32 -0500 Subject: [PATCH 30/39] Enabled SE computation by default and added flag to enable PESQ --- egs/reverb/s5/local/compute_se_scores.sh | 12 ++++++++++-- egs/reverb/s5/local/score_SimData.patch | 6 +++++- egs/reverb/s5/run.sh | 9 +++++---- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/egs/reverb/s5/local/compute_se_scores.sh b/egs/reverb/s5/local/compute_se_scores.sh index d65fbbca2f4..8168c2c46a2 100755 --- a/egs/reverb/s5/local/compute_se_scores.sh +++ b/egs/reverb/s5/local/compute_se_scores.sh @@ -13,6 +13,7 @@ set -o pipefail cmd=run.pl nch=8 +enable_pesq=false . utils/parse_options.sh || exit 1; @@ -22,6 +23,7 @@ if [ $# != 3 ]; then echo "options" echo " --cmd # Command to run in parallel with" echo " --nch # nch of WPE to use for computing SE scores" + echo " --enable_pesq # Boolean flag to enable PESQ" exit 1; fi @@ -30,10 +32,16 @@ enhancement_directory=$2 pesqdir=$3 enhancement_directory_sim=$enhancement_directory/WPE/${nch}ch/REVERB_WSJCAM0_dt/data/ enhancement_directory_real=$enhancement_directory/WPE/${nch}ch/MC_WSJ_AV_Dev/ - expdir=${PWD}/exp/compute_se_${nch}ch +if $enable_pesq; then + compute_pesq=1 +else + compute_pesq=0 +fi + pushd local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools $cmd $expdir/compute_se_real.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_RealData('$reverb_data','$enhancement_directory_real');exit" -$cmd $expdir/compute_se_sim.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_SimData('$reverb_data','$enhancement_directory_sim','$pesqdir');exit" +$cmd $expdir/compute_se_sim.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_SimData('$reverb_data','$enhancement_directory_sim','$pesqdir',$compute_pesq);exit" popd +rm -rf $expdir/scores mv local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/scores $expdir/ diff --git a/egs/reverb/s5/local/score_SimData.patch b/egs/reverb/s5/local/score_SimData.patch index ccfda90910b..4fb0d9f48ac 100644 --- a/egs/reverb/s5/local/score_SimData.patch +++ b/egs/reverb/s5/local/score_SimData.patch @@ -1,7 +1,7 @@ 11c11 < clear all; --- -> function score_SimData(download_from_ldc,senhroot,pesqdir) +> function score_SimData(download_from_ldc,senhroot,pesqdir,compute_pesq) 26,27c26,27 < srmrdir = 'SRMRtoolbox-ReverbChallenge'; < % pesqdir = '/directory/where/pesq/executable/is/stored'; @@ -10,6 +10,10 @@ > addpath(genpath('SRMRToolbox/libs')); 36d35 < senhroot = '../output/SimData'; +39c38 +< if exist('pesqdir', 'var') +--- +> if exist('pesqdir', 'var') && compute_pesq~=0 471c470,472 < fclose(fid); \ No newline at end of file diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 89f77e3e01a..30fca3c6f72 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -38,8 +38,9 @@ fi stage=0 nch_se=8 # flag for turing on computation of dereverberation measures -# please make sure that you or your institution have the license to report PESQ before turning on the flag -compute_se=false +compute_se=true +# please make sure that you or your institution have the license to report PESQ before turning on the below flag +enable_pesq=false . utils/parse_options.sh # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : @@ -61,7 +62,7 @@ fi #training set and test set train_set=tr_simu_8ch -test_sets="dt_real_1ch dt_simu_1ch et_real_1ch et_simu_1ch dt_real_2ch_beamformit dt_simu_2ch_beamformit et_real_2ch_beamformit et_simu_2ch_beamformit dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_real_2ch_wpe dt_simu_2ch_wpe et_real_2ch_wpe et_simu_2ch_wpe dt_real_8ch_wpe dt_simu_8ch_wpe et_real_8ch_wpe et_simu_8ch_wpe dt_cln et_cln" +test_sets="dt_real_8ch_beamformit dt_simu_8ch_beamformit et_real_8ch_beamformit et_simu_8ch_beamformit dt_real_1ch_wpe dt_simu_1ch_wpe et_real_1ch_wpe et_simu_1ch_wpe dt_cln et_cln" # The language models with which to decode (tg_5k or bg_5k) lm="tg_5k" @@ -92,7 +93,7 @@ if [ $stage -le 3 ] && $compute_se; then # download and install speech enhancement evaluation tools local/download_se_eval_tool.sh fi - local/compute_se_scores.sh --nch $nch_se $reverb $wavdir $pesqdir + local/compute_se_scores.sh --nch $nch_se --enable_pesq $enable_pesq $reverb $wavdir $pesqdir cat exp/compute_se_${nch_se}ch/scores/score_SimData cat exp/compute_se_${nch_se}ch/scores/score_RealData fi From 8d23156b0b75b6b70ff9418768d47949a53220f0 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Sat, 17 Nov 2018 21:44:48 -0500 Subject: [PATCH 31/39] update RESULTS and fix error in compute_se_scores.sh --- egs/reverb/s5/RESULTS | 400 ++++++++++++----------- egs/reverb/s5/local/compute_se_scores.sh | 1 + egs/reverb/s5/run.sh | 14 +- 3 files changed, 209 insertions(+), 206 deletions(-) diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS index bc994e05d48..778de45d5b0 100644 --- a/egs/reverb/s5/RESULTS +++ b/egs/reverb/s5/RESULTS @@ -1,250 +1,262 @@ ######################################## GMM RESULTs: exp/tri3/decode_dt_real_1ch -%WER 34.59 [ 506 / 1463, 40 ins, 113 del, 353 sub ] exp/tri3/decode_dt_real_1ch/wer_15_0.5_far_room1 -%WER 30.26 [ 485 / 1603, 42 ins, 112 del, 331 sub ] exp/tri3/decode_dt_real_1ch/wer_17_0.0_near_room1 +%WER 34.18 [ 500 / 1463, 24 ins, 125 del, 351 sub ] exp/tri3/decode_dt_real_1ch/wer_17_1.0_far_room1 +%WER 29.63 [ 475 / 1603, 24 ins, 127 del, 324 sub ] exp/tri3/decode_dt_real_1ch/wer_15_0.5_near_room1 exp/tri3/decode_dt_simu_1ch -%WER 6.73 [ 274 / 4071, 42 ins, 40 del, 192 sub ] exp/tri3/decode_dt_simu_1ch/wer_15_0.0_far_room1 -%WER 18.38 [ 746 / 4058, 83 ins, 133 del, 530 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.5_far_room2 -%WER 19.70 [ 797 / 4045, 73 ins, 183 del, 541 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room3 -%WER 5.43 [ 221 / 4071, 34 ins, 38 del, 149 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_1.0_near_room1 -%WER 7.74 [ 314 / 4058, 54 ins, 47 del, 213 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.5_near_room2 -%WER 7.86 [ 318 / 4045, 38 ins, 53 del, 227 sub ] exp/tri3/decode_dt_simu_1ch/wer_16_0.0_near_room3 +%WER 6.85 [ 279 / 4071, 38 ins, 40 del, 201 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_far_room1 +%WER 18.31 [ 743 / 4058, 65 ins, 156 del, 522 sub ] exp/tri3/decode_dt_simu_1ch/wer_14_0.5_far_room2 +%WER 19.78 [ 800 / 4045, 76 ins, 147 del, 577 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_0.0_far_room3 +%WER 5.58 [ 227 / 4071, 33 ins, 34 del, 160 sub ] exp/tri3/decode_dt_simu_1ch/wer_13_1.0_near_room1 +%WER 7.49 [ 304 / 4058, 51 ins, 33 del, 220 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_0.0_near_room2 +%WER 7.96 [ 322 / 4045, 32 ins, 64 del, 226 sub ] exp/tri3/decode_dt_simu_1ch/wer_12_1.0_near_room3 exp/tri3/decode_et_real_1ch -%WER 32.82 [ 972 / 2962, 93 ins, 178 del, 701 sub ] exp/tri3/decode_et_real_1ch/wer_17_0.0_far_room1 -%WER 33.15 [ 1038 / 3131, 111 ins, 177 del, 750 sub ] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 +%WER 33.09 [ 980 / 2962, 103 ins, 157 del, 720 sub ] exp/tri3/decode_et_real_1ch/wer_13_0.0_far_room1 +%WER 33.18 [ 1039 / 3131, 104 ins, 194 del, 741 sub ] exp/tri3/decode_et_real_1ch/wer_16_0.0_near_room1 exp/tri3/decode_et_simu_1ch -%WER 7.55 [ 446 / 5907, 79 ins, 55 del, 312 sub ] exp/tri3/decode_et_simu_1ch/wer_15_0.5_far_room1 -%WER 18.36 [ 1143 / 6226, 109 ins, 209 del, 825 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.5_far_room2 -%WER 20.60 [ 1209 / 5868, 140 ins, 244 del, 825 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.0_far_room3 -%WER 6.97 [ 412 / 5907, 70 ins, 44 del, 298 sub ] exp/tri3/decode_et_simu_1ch/wer_16_1.0_near_room1 -%WER 9.46 [ 589 / 6226, 54 ins, 116 del, 419 sub ] exp/tri3/decode_et_simu_1ch/wer_15_1.0_near_room2 -%WER 10.62 [ 623 / 5868, 81 ins, 105 del, 437 sub ] exp/tri3/decode_et_simu_1ch/wer_12_1.0_near_room3 +%WER 7.43 [ 439 / 5907, 72 ins, 48 del, 319 sub ] exp/tri3/decode_et_simu_1ch/wer_16_0.5_far_room1 +%WER 18.34 [ 1142 / 6226, 120 ins, 208 del, 814 sub ] exp/tri3/decode_et_simu_1ch/wer_12_0.5_far_room2 +%WER 21.85 [ 1282 / 5868, 110 ins, 278 del, 894 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.5_far_room3 +%WER 7.35 [ 434 / 5907, 76 ins, 46 del, 312 sub ] exp/tri3/decode_et_simu_1ch/wer_17_1.0_near_room1 +%WER 9.35 [ 582 / 6226, 86 ins, 69 del, 427 sub ] exp/tri3/decode_et_simu_1ch/wer_14_0.0_near_room2 +%WER 10.24 [ 601 / 5868, 93 ins, 87 del, 421 sub ] exp/tri3/decode_et_simu_1ch/wer_13_0.0_near_room3 exp/tri3/decode_dt_real_1ch_wpe -%WER 33.83 [ 495 / 1463, 42 ins, 97 del, 356 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_17_0.0_far_room1 -%WER 27.76 [ 445 / 1603, 34 ins, 110 del, 301 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_16_0.0_near_room1 +%WER 33.01 [ 483 / 1463, 41 ins, 85 del, 357 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_17_0.0_far_room1 +%WER 27.32 [ 438 / 1603, 31 ins, 98 del, 309 sub ] exp/tri3/decode_dt_real_1ch_wpe/wer_16_0.0_near_room1 exp/tri3/decode_dt_simu_1ch_wpe -%WER 6.61 [ 269 / 4071, 32 ins, 50 del, 187 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_1.0_far_room1 -%WER 17.32 [ 703 / 4058, 74 ins, 123 del, 506 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_16_0.0_far_room2 -%WER 18.34 [ 742 / 4045, 87 ins, 143 del, 512 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_0.0_far_room3 -%WER 5.50 [ 224 / 4071, 35 ins, 43 del, 146 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_16_1.0_near_room1 -%WER 7.20 [ 292 / 4058, 41 ins, 46 del, 205 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_12_1.0_near_room2 -%WER 7.69 [ 311 / 4045, 30 ins, 59 del, 222 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_1.0_near_room3 +%WER 6.53 [ 266 / 4071, 38 ins, 36 del, 192 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_13_1.0_far_room1 +%WER 17.62 [ 715 / 4058, 40 ins, 186 del, 489 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_1.0_far_room2 +%WER 19.04 [ 770 / 4045, 70 ins, 146 del, 554 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_15_0.0_far_room3 +%WER 5.50 [ 224 / 4071, 31 ins, 33 del, 160 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_14_1.0_near_room1 +%WER 7.76 [ 315 / 4058, 60 ins, 36 del, 219 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_11_0.5_near_room2 +%WER 7.89 [ 319 / 4045, 30 ins, 64 del, 225 sub ] exp/tri3/decode_dt_simu_1ch_wpe/wer_14_1.0_near_room3 exp/tri3/decode_et_real_1ch_wpe -%WER 30.25 [ 896 / 2962, 84 ins, 178 del, 634 sub ] exp/tri3/decode_et_real_1ch_wpe/wer_16_0.5_far_room1 -%WER 31.46 [ 985 / 3131, 111 ins, 157 del, 717 sub ] exp/tri3/decode_et_real_1ch_wpe/wer_16_0.0_near_room1 +%WER 30.08 [ 891 / 2962, 89 ins, 164 del, 638 sub ] exp/tri3/decode_et_real_1ch_wpe/wer_17_0.0_far_room1 +%WER 30.57 [ 957 / 3131, 105 ins, 162 del, 690 sub ] exp/tri3/decode_et_real_1ch_wpe/wer_17_0.0_near_room1 exp/tri3/decode_et_simu_1ch_wpe -%WER 7.23 [ 427 / 5907, 71 ins, 51 del, 305 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_16_0.5_far_room1 -%WER 16.86 [ 1050 / 6226, 75 ins, 243 del, 732 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_14_1.0_far_room2 -%WER 19.90 [ 1168 / 5868, 134 ins, 250 del, 784 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_13_0.5_far_room3 -%WER 7.13 [ 421 / 5907, 76 ins, 39 del, 306 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_17_0.5_near_room1 -%WER 8.54 [ 532 / 6226, 55 ins, 96 del, 381 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_17_0.5_near_room2 -%WER 10.07 [ 591 / 5868, 94 ins, 80 del, 417 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_12_0.5_near_room3 +%WER 6.97 [ 412 / 5907, 71 ins, 52 del, 289 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_15_1.0_far_room1 +%WER 16.59 [ 1033 / 6226, 91 ins, 217 del, 725 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_13_1.0_far_room2 +%WER 20.60 [ 1209 / 5868, 92 ins, 285 del, 832 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_16_0.5_far_room3 +%WER 7.48 [ 442 / 5907, 93 ins, 41 del, 308 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_15_1.0_near_room1 +%WER 8.77 [ 546 / 6226, 76 ins, 59 del, 411 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_14_0.0_near_room2 +%WER 9.20 [ 540 / 5868, 63 ins, 113 del, 364 sub ] exp/tri3/decode_et_simu_1ch_wpe/wer_15_1.0_near_room3 exp/tri3/decode_dt_real_2ch_wpe -%WER 31.51 [ 461 / 1463, 39 ins, 87 del, 335 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.0_far_room1 -%WER 26.51 [ 425 / 1603, 31 ins, 105 del, 289 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.0_near_room1 +%WER 31.58 [ 462 / 1463, 57 ins, 67 del, 338 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_15_0.0_far_room1 +%WER 26.33 [ 422 / 1603, 24 ins, 115 del, 283 sub ] exp/tri3/decode_dt_real_2ch_wpe/wer_17_0.5_near_room1 exp/tri3/decode_dt_simu_2ch_wpe -%WER 6.24 [ 254 / 4071, 51 ins, 34 del, 169 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.0_far_room1 -%WER 14.32 [ 581 / 4058, 78 ins, 98 del, 405 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.0_far_room2 -%WER 17.33 [ 701 / 4045, 87 ins, 154 del, 460 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_14_1.0_far_room3 -%WER 5.75 [ 234 / 4071, 35 ins, 40 del, 159 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_17_1.0_near_room1 -%WER 7.07 [ 287 / 4058, 40 ins, 41 del, 206 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_1.0_near_room2 -%WER 7.54 [ 305 / 4045, 41 ins, 45 del, 219 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_13_0.5_near_room3 +%WER 6.19 [ 252 / 4071, 42 ins, 37 del, 173 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_15_0.5_far_room1 +%WER 13.82 [ 561 / 4058, 72 ins, 92 del, 397 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_11_0.5_far_room2 +%WER 18.12 [ 733 / 4045, 87 ins, 141 del, 505 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_15_0.5_far_room3 +%WER 5.90 [ 240 / 4071, 41 ins, 34 del, 165 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_16_1.0_near_room1 +%WER 7.15 [ 290 / 4058, 57 ins, 37 del, 196 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_17_0.0_near_room2 +%WER 7.91 [ 320 / 4045, 50 ins, 44 del, 226 sub ] exp/tri3/decode_dt_simu_2ch_wpe/wer_14_0.5_near_room3 exp/tri3/decode_et_real_2ch_wpe -%WER 26.81 [ 794 / 2962, 113 ins, 116 del, 565 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_16_0.0_far_room1 -%WER 28.11 [ 880 / 3131, 80 ins, 189 del, 611 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_17_1.0_near_room1 +%WER 26.27 [ 778 / 2962, 89 ins, 132 del, 557 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_16_0.0_far_room1 +%WER 26.48 [ 829 / 3131, 76 ins, 179 del, 574 sub ] exp/tri3/decode_et_real_2ch_wpe/wer_17_1.0_near_room1 exp/tri3/decode_et_simu_2ch_wpe -%WER 7.03 [ 415 / 5907, 73 ins, 45 del, 297 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_16_0.5_far_room1 -%WER 14.63 [ 911 / 6226, 93 ins, 161 del, 657 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_15_0.5_far_room2 -%WER 18.58 [ 1090 / 5868, 128 ins, 213 del, 749 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_14_0.5_far_room3 -%WER 7.06 [ 417 / 5907, 68 ins, 47 del, 302 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_17_1.0_near_room1 -%WER 8.75 [ 545 / 6226, 77 ins, 64 del, 404 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_13_0.5_near_room2 -%WER 9.08 [ 533 / 5868, 86 ins, 82 del, 365 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_12_1.0_near_room3 +%WER 7.58 [ 448 / 5907, 89 ins, 46 del, 313 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_15_1.0_far_room1 +%WER 13.89 [ 865 / 6226, 87 ins, 159 del, 619 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_12_1.0_far_room2 +%WER 18.08 [ 1061 / 5868, 135 ins, 195 del, 731 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_12_0.5_far_room3 +%WER 7.65 [ 452 / 5907, 87 ins, 45 del, 320 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_16_1.0_near_room1 +%WER 8.59 [ 535 / 6226, 65 ins, 69 del, 401 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_16_0.5_near_room2 +%WER 8.49 [ 498 / 5868, 60 ins, 97 del, 341 sub ] exp/tri3/decode_et_simu_2ch_wpe/wer_15_1.0_near_room3 exp/tri3/decode_dt_real_8ch_wpe -%WER 27.41 [ 401 / 1463, 44 ins, 71 del, 286 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_1.0_far_room1 -%WER 23.96 [ 384 / 1603, 42 ins, 83 del, 259 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_1.0_near_room1 +%WER 27.89 [ 408 / 1463, 57 ins, 54 del, 297 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_15_0.5_far_room1 +%WER 22.83 [ 366 / 1603, 27 ins, 87 del, 252 sub ] exp/tri3/decode_dt_real_8ch_wpe/wer_16_1.0_near_room1 exp/tri3/decode_dt_simu_8ch_wpe -%WER 6.31 [ 257 / 4071, 43 ins, 46 del, 168 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_far_room1 -%WER 8.92 [ 362 / 4058, 63 ins, 56 del, 243 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_12_1.0_far_room2 -%WER 9.67 [ 391 / 4045, 131 ins, 42 del, 218 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_14_1.0_far_room3 -%WER 6.07 [ 247 / 4071, 39 ins, 39 del, 169 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_near_room1 -%WER 7.22 [ 293 / 4058, 60 ins, 32 del, 201 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_13_0.5_near_room2 -%WER 6.23 [ 252 / 4045, 54 ins, 25 del, 173 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_14_0.0_near_room3 +%WER 5.87 [ 239 / 4071, 53 ins, 29 del, 157 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_13_0.5_far_room1 +%WER 8.50 [ 345 / 4058, 49 ins, 47 del, 249 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_13_1.0_far_room2 +%WER 9.17 [ 371 / 4045, 89 ins, 58 del, 224 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_15_1.0_far_room3 +%WER 6.31 [ 257 / 4071, 43 ins, 38 del, 176 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_near_room1 +%WER 6.58 [ 267 / 4058, 36 ins, 35 del, 196 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_16_1.0_near_room2 +%WER 6.70 [ 271 / 4045, 48 ins, 41 del, 182 sub ] exp/tri3/decode_dt_simu_8ch_wpe/wer_15_0.5_near_room3 exp/tri3/decode_et_real_8ch_wpe -%WER 22.01 [ 652 / 2962, 118 ins, 87 del, 447 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_17_0.5_far_room1 -%WER 23.95 [ 750 / 3131, 134 ins, 125 del, 491 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_16_1.0_near_room1 +%WER 20.53 [ 608 / 2962, 97 ins, 90 del, 421 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_16_0.5_far_room1 +%WER 21.78 [ 682 / 3131, 94 ins, 133 del, 455 sub ] exp/tri3/decode_et_real_8ch_wpe/wer_17_1.0_near_room1 exp/tri3/decode_et_simu_8ch_wpe -%WER 7.16 [ 423 / 5907, 89 ins, 44 del, 290 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_0.5_far_room1 -%WER 9.73 [ 606 / 6226, 87 ins, 90 del, 429 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_14_1.0_far_room2 -%WER 9.22 [ 541 / 5868, 105 ins, 77 del, 359 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_1.0_far_room3 -%WER 7.08 [ 418 / 5907, 70 ins, 45 del, 303 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_17_1.0_near_room1 -%WER 7.98 [ 497 / 6226, 61 ins, 80 del, 356 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_15_1.0_near_room2 -%WER 7.91 [ 464 / 5868, 83 ins, 67 del, 314 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_16_1.0_near_room3 +%WER 8.01 [ 473 / 5907, 113 ins, 51 del, 309 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_14_1.0_far_room1 +%WER 9.65 [ 601 / 6226, 86 ins, 87 del, 428 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_14_1.0_far_room2 +%WER 8.84 [ 519 / 5868, 105 ins, 73 del, 341 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_16_0.5_far_room3 +%WER 7.53 [ 445 / 5907, 84 ins, 49 del, 312 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_16_1.0_near_room1 +%WER 8.34 [ 519 / 6226, 62 ins, 77 del, 380 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_17_1.0_near_room2 +%WER 7.58 [ 445 / 5868, 68 ins, 72 del, 305 sub ] exp/tri3/decode_et_simu_8ch_wpe/wer_16_1.0_near_room3 exp/tri3/decode_dt_real_2ch_beamformit -%WER 28.43 [ 416 / 1463, 32 ins, 82 del, 302 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_1.0_far_room1 -%WER 23.46 [ 376 / 1603, 26 ins, 98 del, 252 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_1.0_near_room1 +%WER 29.67 [ 434 / 1463, 45 ins, 70 del, 319 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_17_0.5_far_room1 +%WER 24.08 [ 386 / 1603, 38 ins, 87 del, 261 sub ] exp/tri3/decode_dt_real_2ch_beamformit/wer_13_1.0_near_room1 exp/tri3/decode_dt_simu_2ch_beamformit -%WER 6.46 [ 263 / 4071, 57 ins, 36 del, 170 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_14_0.5_far_room1 -%WER 11.80 [ 479 / 4058, 59 ins, 87 del, 333 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_10_1.0_far_room2 -%WER 13.75 [ 556 / 4045, 97 ins, 90 del, 369 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_11_0.5_far_room3 -%WER 6.29 [ 256 / 4071, 49 ins, 41 del, 166 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_17_1.0_near_room1 -%WER 6.36 [ 258 / 4058, 42 ins, 33 del, 183 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_12_1.0_near_room2 -%WER 6.67 [ 270 / 4045, 42 ins, 45 del, 183 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_12_1.0_near_room3 +%WER 6.76 [ 275 / 4071, 60 ins, 43 del, 172 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_16_0.5_far_room1 +%WER 11.93 [ 484 / 4058, 68 ins, 67 del, 349 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_14_0.0_far_room2 +%WER 14.36 [ 581 / 4045, 77 ins, 105 del, 399 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_13_0.5_far_room3 +%WER 6.24 [ 254 / 4071, 41 ins, 40 del, 173 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_16_1.0_near_room1 +%WER 7.00 [ 284 / 4058, 54 ins, 33 del, 197 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_14_0.5_near_room2 +%WER 7.17 [ 290 / 4045, 44 ins, 50 del, 196 sub ] exp/tri3/decode_dt_simu_2ch_beamformit/wer_15_1.0_near_room3 exp/tri3/decode_et_real_2ch_beamformit -%WER 25.05 [ 742 / 2962, 75 ins, 159 del, 508 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_16_1.0_far_room1 -%WER 23.83 [ 746 / 3131, 87 ins, 146 del, 513 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_17_1.0_near_room1 +%WER 23.94 [ 709 / 2962, 92 ins, 108 del, 509 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_16_0.0_far_room1 +%WER 23.09 [ 723 / 3131, 78 ins, 144 del, 501 sub ] exp/tri3/decode_et_real_2ch_beamformit/wer_16_1.0_near_room1 exp/tri3/decode_et_simu_2ch_beamformit -%WER 6.97 [ 412 / 5907, 71 ins, 38 del, 303 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_16_0.5_far_room1 -%WER 12.50 [ 778 / 6226, 104 ins, 104 del, 570 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_12_0.0_far_room2 -%WER 15.59 [ 915 / 5868, 134 ins, 153 del, 628 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_0.0_far_room3 -%WER 7.36 [ 435 / 5907, 80 ins, 48 del, 307 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room1 -%WER 7.73 [ 481 / 6226, 52 ins, 81 del, 348 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room2 -%WER 8.64 [ 507 / 5868, 78 ins, 77 del, 352 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_13_1.0_near_room3 +%WER 7.18 [ 424 / 5907, 74 ins, 47 del, 303 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_15_1.0_far_room1 +%WER 12.14 [ 756 / 6226, 92 ins, 122 del, 542 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_11_1.0_far_room2 +%WER 15.20 [ 892 / 5868, 123 ins, 161 del, 608 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_14_0.0_far_room3 +%WER 7.62 [ 450 / 5907, 87 ins, 51 del, 312 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room1 +%WER 7.53 [ 469 / 6226, 52 ins, 69 del, 348 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_17_1.0_near_room2 +%WER 8.08 [ 474 / 5868, 62 ins, 87 del, 325 sub ] exp/tri3/decode_et_simu_2ch_beamformit/wer_15_1.0_near_room3 exp/tri3/decode_dt_real_8ch_beamformit -%WER 20.71 [ 303 / 1463, 41 ins, 43 del, 219 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_14_1.0_far_room1 -%WER 17.28 [ 277 / 1603, 24 ins, 57 del, 196 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_17_1.0_near_room1 +%WER 20.92 [ 306 / 1463, 44 ins, 43 del, 219 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_13_1.0_far_room1 +%WER 17.53 [ 281 / 1603, 29 ins, 46 del, 206 sub ] exp/tri3/decode_dt_real_8ch_beamformit/wer_16_1.0_near_room1 exp/tri3/decode_dt_simu_8ch_beamformit -%WER 6.17 [ 251 / 4071, 41 ins, 43 del, 167 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_1.0_far_room1 -%WER 7.20 [ 292 / 4058, 58 ins, 44 del, 190 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_0.5_far_room2 -%WER 5.41 [ 219 / 4045, 43 ins, 27 del, 149 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_0.5_far_room3 -%WER 6.12 [ 249 / 4071, 51 ins, 37 del, 161 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_15_1.0_near_room1 -%WER 7.32 [ 297 / 4058, 105 ins, 27 del, 165 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_13_1.0_near_room2 -%WER 5.34 [ 216 / 4045, 48 ins, 31 del, 137 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_1.0_near_room3 +%WER 6.07 [ 247 / 4071, 39 ins, 40 del, 168 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_16_1.0_far_room1 +%WER 6.68 [ 271 / 4058, 45 ins, 44 del, 182 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_15_1.0_far_room2 +%WER 5.91 [ 239 / 4045, 35 ins, 39 del, 165 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_1.0_far_room3 +%WER 6.76 [ 275 / 4071, 56 ins, 39 del, 180 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_15_1.0_near_room1 +%WER 6.83 [ 277 / 4058, 81 ins, 31 del, 165 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_14_1.0_near_room2 +%WER 5.91 [ 239 / 4045, 43 ins, 36 del, 160 sub ] exp/tri3/decode_dt_simu_8ch_beamformit/wer_17_1.0_near_room3 exp/tri3/decode_et_real_8ch_beamformit -%WER 16.91 [ 501 / 2962, 93 ins, 71 del, 337 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_15_1.0_far_room1 -%WER 15.59 [ 488 / 3131, 86 ins, 74 del, 328 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_17_1.0_near_room1 +%WER 15.87 [ 470 / 2962, 66 ins, 81 del, 323 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_15_1.0_far_room1 +%WER 15.08 [ 472 / 3131, 81 ins, 69 del, 322 sub ] exp/tri3/decode_et_real_8ch_beamformit/wer_16_1.0_near_room1 exp/tri3/decode_et_simu_8ch_beamformit -%WER 6.57 [ 388 / 5907, 71 ins, 49 del, 268 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_1.0_far_room1 -%WER 7.60 [ 473 / 6226, 81 ins, 65 del, 327 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_0.5_far_room2 -%WER 7.26 [ 426 / 5868, 64 ins, 67 del, 295 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_far_room3 -%WER 7.11 [ 420 / 5907, 77 ins, 53 del, 290 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room1 -%WER 7.07 [ 440 / 6226, 78 ins, 66 del, 296 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room2 -%WER 7.00 [ 411 / 5868, 83 ins, 59 del, 269 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_near_room3 +%WER 7.03 [ 415 / 5907, 66 ins, 47 del, 302 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_15_1.0_far_room1 +%WER 7.31 [ 455 / 6226, 67 ins, 62 del, 326 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_16_0.5_far_room2 +%WER 7.29 [ 428 / 5868, 71 ins, 63 del, 294 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_14_1.0_far_room3 +%WER 7.43 [ 439 / 5907, 80 ins, 47 del, 312 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room1 +%WER 7.00 [ 436 / 6226, 75 ins, 64 del, 297 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_17_1.0_near_room2 +%WER 6.99 [ 410 / 5868, 62 ins, 62 del, 286 sub ] exp/tri3/decode_et_simu_8ch_beamformit/wer_16_1.0_near_room3 + +exp/tri3/decode_dt_cln +%WER 5.33 [ 217 / 4071, 38 ins, 31 del, 148 sub ] exp/tri3/decode_dt_cln/wer_14_1.0_cln_room1 +%WER 5.72 [ 232 / 4058, 46 ins, 30 del, 156 sub ] exp/tri3/decode_dt_cln/wer_16_1.0_cln_room2 +%WER 5.76 [ 233 / 4045, 39 ins, 39 del, 155 sub ] exp/tri3/decode_dt_cln/wer_17_1.0_cln_room3 exp/tri3/decode_et_cln -%WER 6.74 [ 1213 / 18001, 234 ins, 158 del, 821 sub ] exp/tri3/decode_et_cln/wer_15_1.0_cln_room +%WER 6.35 [ 375 / 5907, 72 ins, 40 del, 263 sub ] exp/tri3/decode_et_cln/wer_14_1.0_cln_room1 +%WER 7.05 [ 439 / 6226, 81 ins, 64 del, 294 sub ] exp/tri3/decode_et_cln/wer_17_1.0_cln_room2 +%WER 6.73 [ 395 / 5868, 70 ins, 61 del, 264 sub ] exp/tri3/decode_et_cln/wer_17_1.0_cln_room3 ######################################## TDNN RESULTs: exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt* -%WER 20.44 [ 299 / 1463, 22 ins, 75 del, 202 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_10_0.5_far_room1 -%WER 18.59 [ 298 / 1603, 16 ins, 79 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_13_0.0_near_room1 -%WER 17.91 [ 262 / 1463, 24 ins, 59 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_8_0.5_far_room1 -%WER 16.16 [ 259 / 1603, 16 ins, 64 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_11_0.0_near_room1 -%WER 16.13 [ 236 / 1463, 21 ins, 66 del, 149 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_10_1.0_far_room1 -%WER 11.92 [ 191 / 1603, 16 ins, 37 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_12_0.0_near_room1 -%WER 18.25 [ 267 / 1463, 21 ins, 70 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_9_0.5_far_room1 -%WER 14.60 [ 234 / 1603, 14 ins, 51 del, 169 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_10_0.0_near_room1 -%WER 12.24 [ 179 / 1463, 10 ins, 51 del, 118 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_1.0_far_room1 -%WER 9.61 [ 154 / 1603, 15 ins, 30 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_11_0.0_near_room1 -%WER 16.20 [ 237 / 1463, 19 ins, 80 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_11_0.5_far_room1 -%WER 12.98 [ 208 / 1603, 20 ins, 54 del, 134 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_10_0.0_near_room1 -%WER 3.19 [ 130 / 4071, 16 ins, 27 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room1 -%WER 7.29 [ 296 / 4058, 35 ins, 51 del, 210 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.5_far_room2 -%WER 7.17 [ 290 / 4045, 31 ins, 57 del, 202 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_far_room3 -%WER 3.00 [ 122 / 4071, 14 ins, 22 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_near_room1 -%WER 3.43 [ 139 / 4058, 12 ins, 21 del, 106 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_1.0_near_room2 -%WER 3.86 [ 156 / 4045, 19 ins, 28 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.0_near_room3 -%WER 3.12 [ 127 / 4071, 20 ins, 19 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_far_room1 -%WER 6.73 [ 273 / 4058, 33 ins, 46 del, 194 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.5_far_room2 -%WER 6.50 [ 263 / 4045, 34 ins, 47 del, 182 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_9_0.0_far_room3 -%WER 3.00 [ 122 / 4071, 15 ins, 13 del, 94 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room1 -%WER 3.25 [ 132 / 4058, 21 ins, 15 del, 96 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_near_room2 -%WER 3.78 [ 153 / 4045, 23 ins, 24 del, 106 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room3 -%WER 3.10 [ 126 / 4071, 22 ins, 18 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_far_room1 -%WER 4.44 [ 180 / 4058, 16 ins, 36 del, 128 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_1.0_far_room2 -%WER 4.70 [ 190 / 4045, 30 ins, 26 del, 134 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_0.0_far_room3 -%WER 2.82 [ 115 / 4071, 12 ins, 18 del, 85 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_near_room1 -%WER 2.88 [ 117 / 4058, 8 ins, 26 del, 83 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_1.0_near_room2 -%WER 3.39 [ 137 / 4045, 25 ins, 20 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_7_0.0_near_room3 -%WER 3.00 [ 122 / 4071, 19 ins, 17 del, 86 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_far_room1 -%WER 5.40 [ 219 / 4058, 26 ins, 38 del, 155 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_7_1.0_far_room2 -%WER 5.86 [ 237 / 4045, 22 ins, 44 del, 171 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_9_0.5_far_room3 -%WER 2.97 [ 121 / 4071, 13 ins, 16 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_near_room1 -%WER 3.40 [ 138 / 4058, 20 ins, 20 del, 98 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.0_near_room2 -%WER 3.76 [ 152 / 4045, 14 ins, 30 del, 108 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_1.0_near_room3 -%WER 3.00 [ 122 / 4071, 13 ins, 22 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.5_far_room1 -%WER 3.03 [ 123 / 4058, 21 ins, 14 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.0_far_room2 -%WER 2.94 [ 119 / 4045, 12 ins, 23 del, 84 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_1.0_far_room3 -%WER 2.95 [ 120 / 4071, 15 ins, 14 del, 91 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.0_near_room1 -%WER 2.64 [ 107 / 4058, 14 ins, 18 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_0.0_near_room2 -%WER 2.84 [ 115 / 4045, 13 ins, 27 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_9_1.0_near_room3 -%WER 2.92 [ 119 / 4071, 14 ins, 21 del, 84 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.0_far_room1 -%WER 3.97 [ 161 / 4058, 14 ins, 38 del, 109 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_1.0_far_room2 -%WER 3.44 [ 139 / 4045, 14 ins, 21 del, 104 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_0.5_far_room3 -%WER 2.92 [ 119 / 4071, 13 ins, 18 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.0_near_room1 -%WER 3.30 [ 134 / 4058, 13 ins, 29 del, 92 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_near_room2 -%WER 3.36 [ 136 / 4045, 15 ins, 27 del, 94 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_near_room3 +%WER 2.60 [ 106 / 4071, 14 ins, 26 del, 66 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_cln/wer_7_1.0_cln_room1 +%WER 2.71 [ 110 / 4058, 18 ins, 15 del, 77 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_cln/wer_7_0.5_cln_room2 +%WER 2.79 [ 113 / 4045, 25 ins, 19 del, 69 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_cln/wer_8_0.5_cln_room3 +%WER 20.51 [ 300 / 1463, 20 ins, 80 del, 200 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_9_1.0_far_room1 +%WER 17.90 [ 287 / 1603, 13 ins, 85 del, 189 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch/wer_11_0.5_near_room1 +%WER 18.66 [ 273 / 1463, 17 ins, 72 del, 184 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_10_1.0_far_room1 +%WER 15.41 [ 247 / 1603, 17 ins, 68 del, 162 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_1ch_wpe/wer_12_0.0_near_room1 +%WER 14.90 [ 218 / 1463, 15 ins, 58 del, 145 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_10_1.0_far_room1 +%WER 12.23 [ 196 / 1603, 13 ins, 41 del, 142 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_beamformit/wer_12_0.0_near_room1 +%WER 17.50 [ 256 / 1463, 22 ins, 65 del, 169 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_9_1.0_far_room1 +%WER 14.29 [ 229 / 1603, 17 ins, 50 del, 162 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_2ch_wpe/wer_10_0.0_near_room1 +%WER 11.07 [ 162 / 1463, 17 ins, 38 del, 107 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_10_0.5_far_room1 +%WER 9.86 [ 158 / 1603, 12 ins, 46 del, 100 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_beamformit/wer_12_1.0_near_room1 +%WER 16.13 [ 236 / 1463, 18 ins, 50 del, 168 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_9_0.5_far_room1 +%WER 11.73 [ 188 / 1603, 12 ins, 54 del, 122 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_real_8ch_wpe/wer_10_0.5_near_room1 +%WER 3.24 [ 132 / 4071, 16 ins, 29 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_1.0_far_room1 +%WER 7.20 [ 292 / 4058, 30 ins, 56 del, 206 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_far_room2 +%WER 6.67 [ 270 / 4045, 21 ins, 56 del, 193 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.5_far_room3 +%WER 2.85 [ 116 / 4071, 17 ins, 16 del, 83 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_7_0.0_near_room1 +%WER 3.52 [ 143 / 4058, 18 ins, 22 del, 103 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_8_0.5_near_room2 +%WER 4.23 [ 171 / 4045, 22 ins, 29 del, 120 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch/wer_9_0.0_near_room3 +%WER 3.14 [ 128 / 4071, 20 ins, 19 del, 89 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.0_far_room1 +%WER 6.73 [ 273 / 4058, 34 ins, 46 del, 193 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_0.5_far_room2 +%WER 6.33 [ 256 / 4045, 23 ins, 52 del, 181 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_10_0.5_far_room3 +%WER 2.60 [ 106 / 4071, 16 ins, 15 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_7_0.0_near_room1 +%WER 3.18 [ 129 / 4058, 13 ins, 23 del, 93 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_8_1.0_near_room2 +%WER 3.98 [ 161 / 4045, 21 ins, 27 del, 113 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_1ch_wpe/wer_9_0.0_near_room3 +%WER 3.24 [ 132 / 4071, 24 ins, 18 del, 90 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_7_0.0_far_room1 +%WER 4.21 [ 171 / 4058, 17 ins, 33 del, 121 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_0.5_far_room2 +%WER 4.65 [ 188 / 4045, 20 ins, 33 del, 135 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_9_0.5_far_room3 +%WER 2.65 [ 108 / 4071, 11 ins, 23 del, 74 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.5_near_room1 +%WER 2.98 [ 121 / 4058, 7 ins, 26 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_1.0_near_room2 +%WER 3.44 [ 139 / 4045, 25 ins, 21 del, 93 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_beamformit/wer_8_0.0_near_room3 +%WER 3.10 [ 126 / 4071, 17 ins, 21 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_7_0.5_far_room1 +%WER 4.88 [ 198 / 4058, 15 ins, 41 del, 142 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_1.0_far_room2 +%WER 5.32 [ 215 / 4045, 23 ins, 39 del, 153 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_10_0.5_far_room3 +%WER 2.75 [ 112 / 4071, 14 ins, 17 del, 81 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_7_0.0_near_room1 +%WER 3.13 [ 127 / 4058, 13 ins, 24 del, 90 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_8_0.5_near_room2 +%WER 3.88 [ 157 / 4045, 16 ins, 33 del, 108 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_2ch_wpe/wer_11_1.0_near_room3 +%WER 3.05 [ 124 / 4071, 17 ins, 22 del, 85 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_0.0_far_room1 +%WER 3.01 [ 122 / 4058, 12 ins, 23 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_10_0.5_far_room2 +%WER 3.19 [ 129 / 4045, 19 ins, 21 del, 89 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_1.0_far_room3 +%WER 2.65 [ 108 / 4071, 15 ins, 20 del, 73 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_7_0.5_near_room1 +%WER 2.51 [ 102 / 4058, 9 ins, 21 del, 72 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_1.0_near_room2 +%WER 2.79 [ 113 / 4045, 17 ins, 21 del, 75 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_beamformit/wer_8_0.5_near_room3 +%WER 3.12 [ 127 / 4071, 19 ins, 20 del, 88 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_7_0.5_far_room1 +%WER 3.82 [ 155 / 4058, 20 ins, 30 del, 105 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_0.5_far_room2 +%WER 3.46 [ 140 / 4045, 14 ins, 24 del, 102 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_1.0_far_room3 +%WER 2.82 [ 115 / 4071, 13 ins, 15 del, 87 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_0.0_near_room1 +%WER 3.10 [ 126 / 4058, 11 ins, 20 del, 95 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_8_0.5_near_room2 +%WER 3.56 [ 144 / 4045, 12 ins, 33 del, 99 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_dt_simu_8ch_wpe/wer_9_1.0_near_room3 exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et* -%WER 3.55 [ 639 / 18001, 77 ins, 125 del, 437 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_cln/wer_9_1.0_cln_room -%WER 19.85 [ 588 / 2962, 52 ins, 114 del, 422 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_12_0.0_far_room1 -%WER 18.24 [ 571 / 3131, 39 ins, 159 del, 373 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_10_1.0_near_room1 -%WER 18.10 [ 536 / 2962, 38 ins, 116 del, 382 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_11_0.5_far_room1 -%WER 15.81 [ 495 / 3131, 61 ins, 96 del, 338 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_10_0.0_near_room1 -%WER 14.21 [ 421 / 2962, 54 ins, 51 del, 316 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.0_far_room1 -%WER 11.27 [ 353 / 3131, 39 ins, 80 del, 234 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_0.5_near_room1 -%WER 15.94 [ 472 / 2962, 44 ins, 101 del, 327 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_11_0.5_far_room1 -%WER 14.53 [ 455 / 3131, 52 ins, 97 del, 306 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_10_0.5_near_room1 -%WER 10.23 [ 303 / 2962, 41 ins, 44 del, 218 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_0.5_far_room1 -%WER 9.17 [ 287 / 3131, 35 ins, 61 del, 191 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_10_1.0_near_room1 -%WER 12.90 [ 382 / 2962, 30 ins, 77 del, 275 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_10_1.0_far_room1 -%WER 11.75 [ 368 / 3131, 58 ins, 65 del, 245 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_10_0.0_near_room1 -%WER 3.74 [ 221 / 5907, 24 ins, 41 del, 156 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_far_room1 -%WER 7.66 [ 477 / 6226, 43 ins, 100 del, 334 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_0.5_far_room2 -%WER 7.72 [ 453 / 5868, 33 ins, 114 del, 306 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_far_room3 -%WER 3.28 [ 194 / 5907, 20 ins, 38 del, 136 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room1 -%WER 4.75 [ 296 / 6226, 27 ins, 66 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_1.0_near_room2 -%WER 4.91 [ 288 / 5868, 31 ins, 56 del, 201 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room3 -%WER 3.69 [ 218 / 5907, 29 ins, 36 del, 153 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_0.5_far_room1 -%WER 7.04 [ 438 / 6226, 48 ins, 77 del, 313 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_7_0.5_far_room2 -%WER 7.17 [ 421 / 5868, 37 ins, 94 del, 290 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_1.0_far_room3 -%WER 3.22 [ 190 / 5907, 27 ins, 31 del, 132 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_10_0.0_near_room1 -%WER 4.72 [ 294 / 6226, 29 ins, 64 del, 201 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_9_1.0_near_room2 -%WER 4.87 [ 286 / 5868, 33 ins, 50 del, 203 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_7_1.0_near_room3 -%WER 3.28 [ 194 / 5907, 20 ins, 36 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room1 -%WER 5.22 [ 325 / 6226, 36 ins, 72 del, 217 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room2 -%WER 6.00 [ 352 / 5868, 39 ins, 70 del, 243 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_far_room3 -%WER 3.20 [ 189 / 5907, 28 ins, 29 del, 132 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_0.0_near_room1 -%WER 4.18 [ 260 / 6226, 24 ins, 60 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_9_1.0_near_room2 -%WER 4.26 [ 250 / 5868, 38 ins, 45 del, 167 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_8_0.5_near_room3 -%WER 3.39 [ 200 / 5907, 27 ins, 35 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_8_1.0_far_room1 -%WER 6.12 [ 381 / 6226, 28 ins, 94 del, 259 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_11_1.0_far_room2 -%WER 6.58 [ 386 / 5868, 39 ins, 77 del, 270 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_far_room3 -%WER 3.20 [ 189 / 5907, 29 ins, 30 del, 130 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_10_0.0_near_room1 -%WER 4.53 [ 282 / 6226, 29 ins, 61 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_near_room2 -%WER 4.48 [ 263 / 5868, 26 ins, 49 del, 188 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_near_room3 -%WER 3.54 [ 209 / 5907, 27 ins, 36 del, 146 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_0.5_far_room1 -%WER 4.11 [ 256 / 6226, 29 ins, 51 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room2 -%WER 3.89 [ 228 / 5868, 28 ins, 43 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room3 -%WER 3.22 [ 190 / 5907, 24 ins, 29 del, 137 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_0.5_near_room1 -%WER 3.71 [ 231 / 6226, 24 ins, 55 del, 152 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_1.0_near_room2 -%WER 3.66 [ 215 / 5868, 22 ins, 46 del, 147 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_near_room3 -%WER 3.50 [ 207 / 5907, 19 ins, 42 del, 146 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_1.0_far_room1 -%WER 5.08 [ 316 / 6226, 34 ins, 59 del, 223 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_7_1.0_far_room2 -%WER 4.46 [ 262 / 5868, 33 ins, 48 del, 181 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_0.5_far_room3 -%WER 3.35 [ 198 / 5907, 16 ins, 41 del, 141 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_1.0_near_room1 -%WER 4.42 [ 275 / 6226, 27 ins, 56 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_8_1.0_near_room2 -%WER 3.92 [ 230 / 5868, 37 ins, 36 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_0.0_near_room3 +%WER 3.30 [ 195 / 5907, 33 ins, 34 del, 128 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_cln/wer_8_1.0_cln_room1 +%WER 3.71 [ 231 / 6226, 38 ins, 36 del, 157 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_cln/wer_7_1.0_cln_room2 +%WER 3.70 [ 217 / 5868, 32 ins, 42 del, 143 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_cln/wer_9_1.0_cln_room3 +%WER 20.90 [ 619 / 2962, 36 ins, 147 del, 436 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_11_1.0_far_room1 +%WER 18.65 [ 584 / 3131, 45 ins, 136 del, 403 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch/wer_11_0.5_near_room1 +%WER 17.69 [ 524 / 2962, 39 ins, 100 del, 385 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_13_0.0_far_room1 +%WER 16.00 [ 501 / 3131, 39 ins, 115 del, 347 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_1ch_wpe/wer_11_0.5_near_room1 +%WER 14.35 [ 425 / 2962, 32 ins, 90 del, 303 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_11_1.0_far_room1 +%WER 12.17 [ 381 / 3131, 44 ins, 76 del, 261 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_beamformit/wer_10_0.5_near_room1 +%WER 16.14 [ 478 / 2962, 45 ins, 91 del, 342 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_11_0.5_far_room1 +%WER 15.08 [ 472 / 3131, 48 ins, 94 del, 330 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_2ch_wpe/wer_10_0.5_near_room1 +%WER 11.01 [ 326 / 2962, 30 ins, 58 del, 238 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_11_1.0_far_room1 +%WER 9.49 [ 297 / 3131, 27 ins, 78 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_beamformit/wer_12_1.0_near_room1 +%WER 13.20 [ 391 / 2962, 32 ins, 70 del, 289 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_9_1.0_far_room1 +%WER 12.17 [ 381 / 3131, 52 ins, 69 del, 260 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_real_8ch_wpe/wer_9_0.5_near_room1 +%WER 3.79 [ 224 / 5907, 20 ins, 49 del, 155 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_11_1.0_far_room1 +%WER 7.68 [ 478 / 6226, 60 ins, 94 del, 324 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_0.0_far_room2 +%WER 7.40 [ 434 / 5868, 46 ins, 93 del, 295 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_0.5_far_room3 +%WER 3.28 [ 194 / 5907, 36 ins, 29 del, 129 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_9_0.0_near_room1 +%WER 4.63 [ 288 / 6226, 33 ins, 57 del, 198 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_8_1.0_near_room2 +%WER 4.75 [ 279 / 5868, 26 ins, 60 del, 193 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch/wer_10_1.0_near_room3 +%WER 3.67 [ 217 / 5907, 31 ins, 34 del, 152 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_0.5_far_room1 +%WER 7.15 [ 445 / 6226, 39 ins, 91 del, 315 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_10_0.5_far_room2 +%WER 7.11 [ 417 / 5868, 39 ins, 100 del, 278 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_9_1.0_far_room3 +%WER 3.03 [ 179 / 5907, 37 ins, 24 del, 118 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_0.0_near_room1 +%WER 4.74 [ 295 / 6226, 34 ins, 57 del, 204 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_8_1.0_near_room2 +%WER 4.31 [ 253 / 5868, 27 ins, 51 del, 175 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_1ch_wpe/wer_9_1.0_near_room3 +%WER 3.23 [ 191 / 5907, 18 ins, 40 del, 133 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_1.0_far_room1 +%WER 5.35 [ 333 / 6226, 31 ins, 75 del, 227 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_1.0_far_room2 +%WER 5.81 [ 341 / 5868, 43 ins, 57 del, 241 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_0.5_far_room3 +%WER 3.15 [ 186 / 5907, 24 ins, 33 del, 129 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_8_1.0_near_room1 +%WER 4.42 [ 275 / 6226, 28 ins, 57 del, 190 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_8_1.0_near_room2 +%WER 4.12 [ 242 / 5868, 21 ins, 43 del, 178 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_beamformit/wer_10_1.0_near_room3 +%WER 3.34 [ 197 / 5907, 17 ins, 42 del, 138 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_10_1.0_far_room1 +%WER 6.22 [ 387 / 6226, 33 ins, 83 del, 271 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_far_room2 +%WER 6.34 [ 372 / 5868, 37 ins, 76 del, 259 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_1.0_far_room3 +%WER 3.17 [ 187 / 5907, 31 ins, 29 del, 127 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_8_0.5_near_room1 +%WER 4.63 [ 288 / 6226, 32 ins, 56 del, 200 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_8_1.0_near_room2 +%WER 4.36 [ 256 / 5868, 36 ins, 41 del, 179 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_2ch_wpe/wer_9_0.5_near_room3 +%WER 3.50 [ 207 / 5907, 29 ins, 33 del, 145 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_far_room1 +%WER 4.42 [ 275 / 6226, 32 ins, 61 del, 182 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_1.0_far_room2 +%WER 3.83 [ 225 / 5868, 34 ins, 37 del, 154 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_0.5_far_room3 +%WER 3.15 [ 186 / 5907, 26 ins, 31 del, 129 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_8_1.0_near_room1 +%WER 4.00 [ 249 / 6226, 27 ins, 57 del, 165 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_9_1.0_near_room2 +%WER 3.54 [ 208 / 5868, 16 ins, 41 del, 151 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_beamformit/wer_10_1.0_near_room3 +%WER 3.61 [ 213 / 5907, 26 ins, 35 del, 152 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_8_1.0_far_room1 +%WER 5.41 [ 337 / 6226, 40 ins, 64 del, 233 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_7_1.0_far_room2 +%WER 4.38 [ 257 / 5868, 26 ins, 55 del, 176 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_1.0_far_room3 +%WER 3.06 [ 181 / 5907, 18 ins, 40 del, 123 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_10_1.0_near_room1 +%WER 4.45 [ 277 / 6226, 32 ins, 53 del, 192 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_8_1.0_near_room2 +%WER 3.75 [ 220 / 5868, 21 ins, 40 del, 159 sub ] exp/chain_tr_simu_8ch/tdnn1a_sp/decode_test_tg_5k_et_simu_8ch_wpe/wer_9_1.0_near_room3 diff --git a/egs/reverb/s5/local/compute_se_scores.sh b/egs/reverb/s5/local/compute_se_scores.sh index d65fbbca2f4..93458b8c8ac 100755 --- a/egs/reverb/s5/local/compute_se_scores.sh +++ b/egs/reverb/s5/local/compute_se_scores.sh @@ -36,4 +36,5 @@ pushd local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/evaltools $cmd $expdir/compute_se_real.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_RealData('$reverb_data','$enhancement_directory_real');exit" $cmd $expdir/compute_se_sim.log matlab -nodisplay -nosplash -r "addpath('SRMRToolbox'); score_SimData('$reverb_data','$enhancement_directory_sim','$pesqdir');exit" popd +rm -rf $expdir/scores mv local/REVERB_scores_source/REVERB-SPEENHA.Release04Oct/scores $expdir/ diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 89f77e3e01a..09577b1f84d 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -39,7 +39,7 @@ stage=0 nch_se=8 # flag for turing on computation of dereverberation measures # please make sure that you or your institution have the license to report PESQ before turning on the flag -compute_se=false +compute_se=true . utils/parse_options.sh # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : @@ -100,7 +100,7 @@ fi if [ $stage -le 4 ]; then # Prepare wsjcam0 clean data and wsj0 language model. local/wsjcam0_data_prep.sh $wsjcam0 $wsj0 - + # Prepare merged BEEP/CMU dictionary. local/wsj_prepare_beep_dict.sh @@ -109,16 +109,6 @@ if [ $stage -le 4 ]; then # Prepare directory structure for clean data. Apply some language model fixes. local/wsjcam0_format_data.sh - - local/train_lms_srilm.sh \ - --train-text data/${train_set}/text --dev-text data/dt_simu_8ch/text \ - --oov-symbol "" --words-file data/lang/words.txt \ - data/ data/srilm - - LM=data/srilm/best_3gram.gz - # Compiles G for reverb 3-gram LM - utils/format_lm.sh \ - data/lang $LM data/local/dict/lexicon.txt data/lang fi if [ $stage -le 5 ]; then From 5caf1ca8aa46f4908be9826630c30b7cc09e8241 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Sat, 17 Nov 2018 21:46:10 -0500 Subject: [PATCH 32/39] minor fix --- egs/reverb/s5/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index 09577b1f84d..ff70badd3ea 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -39,7 +39,7 @@ stage=0 nch_se=8 # flag for turing on computation of dereverberation measures # please make sure that you or your institution have the license to report PESQ before turning on the flag -compute_se=true +compute_se=false . utils/parse_options.sh # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : From 69659e624bb067e32c00f08f7aa99defa82f66e7 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Sat, 17 Nov 2018 22:19:39 -0500 Subject: [PATCH 33/39] remove some useless comment lines --- egs/reverb/s5/local/Generate_mcTrainData_cut.m | 2 -- 1 file changed, 2 deletions(-) diff --git a/egs/reverb/s5/local/Generate_mcTrainData_cut.m b/egs/reverb/s5/local/Generate_mcTrainData_cut.m index e6d7d95550a..831ff6a5226 100755 --- a/egs/reverb/s5/local/Generate_mcTrainData_cut.m +++ b/egs/reverb/s5/local/Generate_mcTrainData_cut.m @@ -87,7 +87,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir) save_dir_tr=[save_dir,'/data/mc_train/']; end mkdir([save_dir_tr]); -%mkdir([save_dir,'/taskfiles/']) mic_idx=['A';'B';'C';'D';'E';'F';'G';'H']; prev_fname='dummy'; @@ -137,7 +136,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir) for ch=1:8 outfilename = [save_dir_tr, fname, '_ch', num2str(ch), '.wav']; - %eval(['audiowrite(y(:,',num2str(ch),'),16000,''',save_dir_tr fname,'_ch',num2str(ch),'.wav'');']); eval(['audiowrite(outfilename, y(:,',num2str(ch),'), 16000);']); end From 135494b599ec9ff890940acb35c2f6dcfec6d598 Mon Sep 17 00:00:00 2001 From: Shinji Watanabe Date: Tue, 20 Nov 2018 11:35:09 -0500 Subject: [PATCH 34/39] 1) removed unnecessary files 2) Add the shebang header 3) Add option for dereverberation and beamforming --- egs/reverb/s5/local/run_wpe.py | 2 + egs/reverb/s5/local/train_lms_srilm.sh | 261 ------------------------- egs/reverb/s5/run.sh | 4 +- 3 files changed, 4 insertions(+), 263 deletions(-) delete mode 100755 egs/reverb/s5/local/train_lms_srilm.sh diff --git a/egs/reverb/s5/local/run_wpe.py b/egs/reverb/s5/local/run_wpe.py index 9c5e14c107e..cc9cd41927a 100644 --- a/egs/reverb/s5/local/run_wpe.py +++ b/egs/reverb/s5/local/run_wpe.py @@ -1,5 +1,7 @@ +#!/usr/bin/env python # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) # Apache 2.0 +# Works with both python2 and python3 import numpy as np import soundfile as sf diff --git a/egs/reverb/s5/local/train_lms_srilm.sh b/egs/reverb/s5/local/train_lms_srilm.sh deleted file mode 100755 index 5a1d56d24b3..00000000000 --- a/egs/reverb/s5/local/train_lms_srilm.sh +++ /dev/null @@ -1,261 +0,0 @@ -#!/bin/bash -# Copyright (c) 2017 Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe) -# Apache 2.0 - -export LC_ALL=C - -# Begin configuration section. -words_file= -train_text= -dev_text= -oov_symbol="" -# End configuration section - -echo "$0 $@" - -[ -f path.sh ] && . ./path.sh -. ./utils/parse_options.sh || exit 1 - -echo "-------------------------------------" -echo "Building an SRILM language model " -echo "-------------------------------------" - -if [ $# -ne 2 ] ; then - echo "Incorrect number of parameters. " - echo "Script has to be called like this:" - echo " $0 [switches] " - echo "For example: " - echo " $0 data data/srilm" - echo "The allowed switches are: " - echo " words_file= word list file -- data/lang/words.txt by default" - echo " train_text= data/train/text is used in case when not specified" - echo " dev_text= last 10 % of the train text is used by default" - echo " oov_symbol=> symbol to use for oov modeling -- by default" - exit 1 -fi - -datadir=$1 -tgtdir=$2 - -##End of configuration -loc=`which ngram-count`; -if [ -z $loc ]; then - echo >&2 "You appear to not have SRILM tools installed, either on your path," - echo >&2 "Use the script \$KALDI_ROOT/tools/install_srilm.sh to install it." - exit 1 -fi - -# Prepare the destination directory -mkdir -p $tgtdir - -for f in $words_file $train_text $dev_text; do - [ ! -s $f ] && echo "No such file $f" && exit 1; -done - -[ -z $words_file ] && words_file=$datadir/lang/words.txt -if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then - nr=`cat $train_text | wc -l` - nr_dev=$(($nr / 10 )) - nr_train=$(( $nr - $nr_dev )) - orig_train_text=$train_text - head -n $nr_train $train_text > $tgtdir/train_text - tail -n $nr_dev $train_text > $tgtdir/dev_text - - train_text=$tgtdir/train_text - dev_text=$tgtdir/dev_text - echo "Using words file: $words_file" - echo "Using train text: 9/10 of $orig_train_text" - echo "Using dev text : 1/10 of $orig_train_text" -elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then - echo "Using words file: $words_file" - echo "Using train text: $train_text" - echo "Using dev text : $dev_text" - train_text=$train_text - dev_text=$dev_text -else - train_text=$datadir/train/text - dev_text=$datadir/dev2h/text - echo "Using words file: $words_file" - echo "Using train text: $train_text" - echo "Using dev text : $dev_text" - -fi - -[ ! -f $words_file ] && echo >&2 "File $words_file must exist!" && exit 1 -[ ! -f $train_text ] && echo >&2 "File $train_text must exist!" && exit 1 -[ ! -f $dev_text ] && echo >&2 "File $dev_text must exist!" && exit 1 - - -# Extract the word list from the training dictionary; exclude special symbols -sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab -if (($?)); then - echo "Failed to create vocab from $words_file" - exit 1 -else - # wc vocab # doesn't work due to some encoding issues - echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'` -fi - -# Kaldi transcript files contain Utterance_ID as the first word; remove it -# We also have to avoid skewing the LM by incorporating the same sentences -# from different channels -sed -e "s/\.CH.//" -e "s/_.\-./_/" -e "s/NOLOCATION\(\.[LR]\)*-//" -e "s/U[0-9][0-9]_//" $train_text | sort -u | \ - perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/train.txt -if (($?)); then - echo "Failed to create $tgtdir/train.txt from $train_text" - exit 1 -else - echo "Removed first word (uid) from every line of $train_text" - # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` - echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` -fi - -# Kaldi transcript files contain Utterance_ID as the first word; remove it -sed -e "s/\.CH.//" -e "s/_.\-./_/" $dev_text | sort -u | \ - perl -ane 'print join(" ", @F[1..$#F]) . "\n" if @F > 1' > $tgtdir/dev.txt -if (($?)); then - echo "Failed to create $tgtdir/dev.txt from $dev_text" - exit 1 -else - echo "Removed first word (uid) from every line of $dev_text" - # wc text.train train.txt # doesn't work due to some encoding issues - echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` - echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` -fi - - -echo "-------------------" -echo "Good-Turing 3grams" -echo "-------------------" -ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" - -echo "-------------------" -echo "Kneser-Ney 3grams" -echo "-------------------" -ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 \ - -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 \ - -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 \ - -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 \ - -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn111.gz -kndiscount1 -gt1min 1 \ - -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn112.gz -kndiscount1 -gt1min 1 \ - -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn122.gz -kndiscount1 -gt1min 1 \ - -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/3gram.kn123.gz -kndiscount1 -gt1min 1 \ - -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -interpolate \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" - - -echo "-------------------" -echo "Good-Turing 4grams" -echo "-------------------" -ngram-count -lm $tgtdir/4gram.gt0111.gz \ - -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.gt0112.gz \ - -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.gt0122.gz \ - -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.gt0123.gz \ - -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.gt0113.gz \ - -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.gt0222.gz \ - -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.gt0223.gz \ - -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" - -echo "-------------------" -echo "Kneser-Ney 4grams" -echo "-------------------" -ngram-count -lm $tgtdir/4gram.kn0111.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.kn0112.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.kn0113.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.kn0122.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.kn0123.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.kn0222.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" -ngram-count -lm $tgtdir/4gram.kn0223.gz \ - -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 \ - -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" - -if [ ! -z ${LIBLBFGS} ]; then - #please note that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault - #instead of that, we simply output the model in the maxent format and convert it using the "ngram" - echo "-------------------" - echo "Maxent 3grams" - echo "-------------------" - sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ - ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ - ngram -lm - -order 3 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\ - sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 - - echo "-------------------" - echo "Maxent 4grams" - echo "-------------------" - sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ - ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ - ngram -lm - -order 4 -unk -map-unk "$oov_symbol" -prune-lowprobs -write-lm - |\ - sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 -else - echo >&2 "SRILM is not compiled with the support of MaxEnt models." - echo >&2 "You should use the script in \$KALDI_ROOT/tools/install_srilm.sh" - echo >&2 "which will take care of compiling the SRILM with MaxEnt support" - exit 1; -fi - - -echo "--------------------" -echo "Computing perplexity" -echo "--------------------" -( - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -prune-lowprobs -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done -) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt - -echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " -echo "" - -for best_ngram in {3,4}gram ; do - outlm=best_${best_ngram}.gz - lmfilename=$(grep "${best_ngram}" $tgtdir/perplexities.txt | head -n 1 | cut -f 1 -d ' ') - echo "$outlm -> $lmfilename" - (cd $tgtdir; rm -f $outlm; ln -sf $(basename $lmfilename) $outlm ) -done diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh index ea6114429a5..999ec98e637 100755 --- a/egs/reverb/s5/run.sh +++ b/egs/reverb/s5/run.sh @@ -83,8 +83,8 @@ if [ ${stage} -le 1 ]; then fi if [ $stage -le 2 ]; then - local/run_wpe.sh - local/run_beamform.sh ${wavdir}/WPE/ + local/run_wpe.sh --cmd "$train_cmd" + local/run_beamform.sh --cmd "$train_cmd" ${wavdir}/WPE/ fi # Compute dereverberation scores From 447cdea44f13da72b5c331b2b45c95c2631e292e Mon Sep 17 00:00:00 2001 From: Shinji Watanabe Date: Wed, 21 Nov 2018 19:48:41 -0500 Subject: [PATCH 35/39] delete unused config files --- egs/reverb/s5/conf/decode_dnn.config | 2 -- egs/reverb/s5/conf/fbank.conf | 2 -- 2 files changed, 4 deletions(-) delete mode 100644 egs/reverb/s5/conf/decode_dnn.config delete mode 100644 egs/reverb/s5/conf/fbank.conf diff --git a/egs/reverb/s5/conf/decode_dnn.config b/egs/reverb/s5/conf/decode_dnn.config deleted file mode 100644 index bfaae86702e..00000000000 --- a/egs/reverb/s5/conf/decode_dnn.config +++ /dev/null @@ -1,2 +0,0 @@ -beam=18.0 # beam for decoding. Was 13.0 in the scripts. -latbeam=10.0 # this has most effect on size of the lattices. diff --git a/egs/reverb/s5/conf/fbank.conf b/egs/reverb/s5/conf/fbank.conf deleted file mode 100644 index 82ac7bd0dbc..00000000000 --- a/egs/reverb/s5/conf/fbank.conf +++ /dev/null @@ -1,2 +0,0 @@ ---sample-frequency=16000 ---num-mel-bins=80 From 21bdf1e53b5fd36f9a75a814568fdb9734b9a776 Mon Sep 17 00:00:00 2001 From: Chen Szu-Jui Date: Wed, 28 Nov 2018 22:03:56 -0500 Subject: [PATCH 36/39] update reverb README.txt and some chime5 stuff --- egs/chime5/s5/cmd.sh | 2 +- .../s5/local/chain/tuning/run_tdnn_1a.sh | 11 -- .../s5/local/nnet3/run_ivector_common.sh | 2 +- egs/chime5/s5/local/score_for_submit.sh | 8 +- egs/chime5/s5/run.sh | 60 +++++++++- egs/reverb/s5/README.txt | 109 ++---------------- 6 files changed, 73 insertions(+), 119 deletions(-) diff --git a/egs/chime5/s5/cmd.sh b/egs/chime5/s5/cmd.sh index a697a22cda3..9702501f1a7 100644 --- a/egs/chime5/s5/cmd.sh +++ b/egs/chime5/s5/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="queue.pl --mem 2G" +export train_cmd="retry.pl queue.pl --mem 2G" export decode_cmd="queue.pl --mem 4G" diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh index 45a7fd84bd6..daad37e2cd7 100755 --- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh @@ -24,21 +24,16 @@ decode_iter= # training options # training chunk-options chunk_width=140,100,160 -# we don't need extra left/right context for TDNN systems. -chunk_left_context=0 -chunk_right_context=0 common_egs_dir= xent_regularize=0.1 # training options srand=0 remove_egs=true -reporting_email= #decode options test_online_decoding=false # if true, it will run the last decoding stage. - # End configuration section. echo "$0 $@" # Print the command line for logging @@ -176,7 +171,6 @@ EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi - if [ $stage -le 14 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ @@ -212,7 +206,6 @@ if [ $stage -le 14 ]; then --egs.opts="--frames-overlap-per-eg 0" \ --cleanup.remove-egs=$remove_egs \ --use-gpu=true \ - --reporting.email="$reporting_email" \ --feat-dir=$train_data_dir \ --tree-dir=$tree_dir \ --lat-dir=$lat_dir \ @@ -235,10 +228,6 @@ if [ $stage -le 16 ]; then ( steps/nnet3/decode.sh \ --acwt 1.0 --post-decode-acwt 10.0 \ - --extra-left-context $chunk_left_context \ - --extra-right-context $chunk_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ --frames-per-chunk $frames_per_chunk \ --nj 8 --cmd "$decode_cmd" --num-threads 4 \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \ diff --git a/egs/chime5/s5/local/nnet3/run_ivector_common.sh b/egs/chime5/s5/local/nnet3/run_ivector_common.sh index e28e5ce996d..5be853cf679 100755 --- a/egs/chime5/s5/local/nnet3/run_ivector_common.sh +++ b/egs/chime5/s5/local/nnet3/run_ivector_common.sh @@ -23,7 +23,7 @@ nnet3_affix=_train_worn_u100k gmm_dir=exp/${gmm} ali_dir=exp/${gmm}_ali_${train_set}_sp -for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do +for f in ${gmm_dir}/final.mdl; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 diff --git a/egs/chime5/s5/local/score_for_submit.sh b/egs/chime5/s5/local/score_for_submit.sh index 5502c5994e5..23121d68b93 100755 --- a/egs/chime5/s5/local/score_for_submit.sh +++ b/egs/chime5/s5/local/score_for_submit.sh @@ -43,7 +43,7 @@ for session in S02 S09; do # get nerror nerr=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$4+$5+$6} END {print sum}'` # get nwords from references (NF-2 means to exclude utterance id and " ref ") - nwrd=`grep " ref " $score_result | grep $room | grep $session | sed -e "s/\*//g" | awk '{sum+=NF-2} END {print sum}'` + nwrd=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$3+$4+$6} END {print sum}'` # compute wer with scale=2 wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc` @@ -59,7 +59,7 @@ echo -n "overall: " # get nerror nerr=`grep "\#csid" $score_result | awk '{sum+=$4+$5+$6} END {print sum}'` # get nwords from references (NF-2 means to exclude utterance id and " ref ") -nwrd=`grep " ref " $score_result | sed -e "s/\*//g" | awk '{sum+=NF-2} END {print sum}'` +nwrd=`grep "\#csid" $score_result | awk '{sum+=$3+$4+$6} END {print sum}'` # compute wer with scale=2 wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc` echo -n "#words $nwrd, " @@ -81,7 +81,7 @@ for session in S01 S21; do # get nerror nerr=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$4+$5+$6} END {print sum}'` # get nwords from references (NF-2 means to exclude utterance id and " ref ") - nwrd=`grep " ref " $score_result | grep $room | grep $session | sed -e "s/\*//g" | awk '{sum+=NF-2} END {print sum}'` + nwrd=`grep "\#csid" $score_result | grep $room | grep $session | awk '{sum+=$3+$4+$6} END {print sum}'` # compute wer with scale=2 wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc` @@ -98,7 +98,7 @@ if $do_eval; then # get nerror nerr=`grep "\#csid" $score_result | awk '{sum+=$4+$5+$6} END {print sum}'` # get nwords from references (NF-2 means to exclude utterance id and " ref ") - nwrd=`grep " ref " $score_result | sed -e "s/\*//g" | awk '{sum+=NF-2} END {print sum}'` + nwrd=`grep "\#csid" $score_result | awk '{sum+=$3+$4+$6} END {print sum}'` # compute wer with scale=2 wer=`echo "scale=2; 100 * $nerr / $nwrd" | bc` echo -n "overall: " diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh index 024c0190b3e..cc2cc3e558b 100755 --- a/egs/chime5/s5/run.sh +++ b/egs/chime5/s5/run.sh @@ -28,7 +28,7 @@ json_dir=${chime5_corpus}/transcriptions audio_dir=${chime5_corpus}/audio # training and test data -train_set=train_worn_u100k +train_set=train_worn_u400k test_sets="dev_worn dev_${enhancement}_ref eval_${enhancement}_ref" # This script also needs the phonetisaurus g2p, srilm, beamformit @@ -99,8 +99,8 @@ if [ $stage -le 5 ]; then # randomly extract first 100k utterances from all mics # if you want to include more training data, you can increase the number of array mic utterances utils/combine_data.sh data/train_uall data/train_u01 data/train_u02 data/train_u04 data/train_u05 data/train_u06 - utils/subset_data_dir.sh data/train_uall 100000 data/train_u100k - utils/combine_data.sh data/${train_set} data/train_worn data/train_u100k + utils/subset_data_dir.sh data/train_uall 400000 data/train_u400k + utils/combine_data.sh data/${train_set} data/train_worn data/train_u400k # only use left channel for worn mic recognition # you can use both left and right channels for training @@ -191,6 +191,32 @@ if [ $stage -le 12 ]; then wait fi +#if [ $stage -le 13 ]; then +# steps/get_prons.sh --cmd "$train_cmd" data/train data/lang_nosp exp/tri2 +# utils/dict_dir_add_pronprobs.sh --max-normalize true \ +# data/local/dict_nosp exp/tri2/pron_counts_nowb.txt \ +# exp/tri2/sil_counts_nowb.txt \ +# exp/tri2/pron_bigram_counts_nowb.txt data/local/dict +#fi +# +#if [ $stage -le 14 ]; then +# utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang +# cp -rT data/lang data/lang_rescore +# cp data/lang_nosp/G.fst data/lang/ +# cp data/lang_nosp_rescore/G.carpa data/lang_rescore/ +# +# utils/mkgraph.sh data/lang exp/tri2 exp/tri2/graph +# +# for dset in dev test; do +# steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ +# exp/tri2/graph data/${dset} exp/tri2/decode_${dset} +# steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ +# data/${dset} exp/tri2/decode_${dset} exp/tri2/decode_${dset}_rescore +# done +#fi + + + if [ $stage -le 14 ]; then steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/${train_set} data/lang exp/tri2 exp/tri2_ali @@ -216,11 +242,35 @@ if [ $stage -le 16 ]; then fi if [ $stage -le 17 ]; then - # chain TDNN - local/chain/run_tdnn.sh --nj ${nj} --train-set ${train_set}_cleaned --test-sets "$test_sets" --gmm tri3_cleaned --nnet3-affix _${train_set}_cleaned + rm -r data/train_worn_cleaned 2>/dev/null || true + utils/copy_data_dir.sh data/${train_set}_clean data/train_worn_cleaned + + awk '{print $1}' data/train_worn/wav.scp > data/train_worn_cleaned/recos.tmp + utils/filter_scp.pl data/train_worn_cleaned/recos.tmp \ + data/${train_set}_cleaned/wav.scp > data/train_worn_cleaned/wav.scp + + utils/fix_data_dir.sh data/train_worn_cleaned + + rm -r data/train_u400k_cleaned 2>/dev/null || true + utils/copy_data_dir.sh data/${train_set}_clean data/train_u400k_cleaned + + utils/filter_scp.pl --exclude data/train_worn_cleaned/recos.tmp \ + data/${train_set}_cleaned/wav.scp > data/train_u400k_cleaned/wav.scp + + utils/fix_data_dir.sh data/train_u400k_cleaned fi if [ $stage -le 18 ]; then + # chain TDNN + local/chain/multi_condition/run_tdnn.sh --nj ${nj} \ + --train-set-clean train_worn_cleaned \ + --train-set-noisy train_u400k_cleaned \ + --combined-train-set ${train_set}_cleaned \ + --test-sets "$test_sets" \ + --gmm tri3_cleaned --nnet3-affix _${train_set}_cleaned_rvb +fi + +if [ $stage -le 19 ]; then # final scoring to get the official challenge result # please specify both dev and eval set directories so that the search parameters # (insertion penalty and language model weight) will be tuned using the dev set diff --git a/egs/reverb/s5/README.txt b/egs/reverb/s5/README.txt index 1daa214edb6..295b3da0582 100644 --- a/egs/reverb/s5/README.txt +++ b/egs/reverb/s5/README.txt @@ -1,6 +1,9 @@ -Improved multi condition training baseline for REVERB challenge based on Kaldi +Improved baseline for REVERB challenge based on Kaldi ============================================================================== +updated +Wed Nov 28 11:36:30 EST 2018 Szu-Jui Chen + updated Wed Apr 29 19:10:33 EDT 2015 Shinji Watanabe @@ -11,24 +14,13 @@ original: Wed Nov 6 14:47:59 EST 2013 Felix Weninger Key specs: -- MFCC-LDA-STC front-end -- Boosted MMI trained GMM-HMM +- MFCC-LDA-STC front-end(not sure) +- TDNN acoustic model - Utterance-based adaptation using basis fMLLR -- Tri-gram LM minimum Bayes risk decoding - -WER [%] -@ Language model weight = 15 -Avg(SimData_(far|near)) = 11.73 -Avg(RealData) = 30.44 -@ Language model weight = 16 (optimal) -Avg(SimData_(far|near)) = 11.72 -Avg(RealData) = 30.28 - -See RESULTS in more detail - -Kaldi SVN rev. 5035, 4/26/15 -tested on Ubuntu 13.04 +- Tri-gram LM minimum Bayes risk decoding(not sure) +RESULT: +For experiment results, please see RESULTS for more detail REFERENCE: ++++++++ @@ -43,88 +35,11 @@ Enhancement", Proc. REVERB Workshop, IEEE, Florence, Italy, May 2014. INSTRUCTIONS: +++++++++++++ - -1) Set the path names in corpus.sh.default, - and copy this file to "corpus.sh" - ------ -2) [optional:] If you have speech enhancement (processed waveforms), then - -3a) Change directories and data preparation steps - For example, you could have something like - - local/REVERB_wsjcam0_data_prep.sh /path/to/processed/REVERB_WSJCAM0_dt REVERB_dt_derev dt - - The first argument is supposed to point to a folder that has the same - structure as the REVERB corpus. - -3b) run the multi-condition training steps in run.sh with the processed - training set, e.g., REVERB_tr_cut_derev, if you want to investigate - recognizer re-training - - - Any system that has _mc in its name uses multi-condition training - - You probably want to change the system names if you are using enhanced - data for training (e.g. tri2b_mc -> tri2b_mc_derev) - -3c) Add your re-trained recognizer to the list of recognizers that are - discriminatively re-trained - -3d) Modify the decoding steps in run.sh so that they use enhanced data and add - your re-trained recognizer(s) to the list ------ - -4) Execute the training and recognition steps by +1) Execute the training and recognition steps by ./run.sh Depending on your system specs (# of CPUs, RAM) you might want (or have) to - change the number of parallel jobs -- this is controlled by the nj_train, - nj_bg, and nj_tg variables (# of jobs for training, for bi-gram and tri-gram - decoding). - - If you also want to have the re-implementation of the HTK baseline in Kaldi - (tri2a and tri2a_mc systems), set the do_tri2a variable to true in run.sh. - -5) Execute - - ./local/get_results.sh - - to display the results corresponding to Table 1 in - the following paper, - - Felix Weninger, Shinji Watanabe, Jonathan Le Roux, John R. Hershey, Yuuki - Tachioka, Jürgen Geiger, Björn Schuller, Gerhard Rigoll: "The MERL/MELCO/TUM - system for the REVERB Challenge using Deep Recurrent Neural Network Feature - Enhancement", to appear in Proc. REVERB Workshop, IEEE, Florence, Italy, 2014. - - NOTE: It is very common to have slightly different results (up to +/- 1% - absolute WER per REVERB task file) on different machines. The reason for - this is not fully known. - - NOTE 2: By default, only the LDA-STC systems are trained - set do_tri2a in - run.sh to true to also train the Delta+Delta-Delta systems (cf. above). - ------ -6) You can get more recognition results (for other combinations of front-ends, - adaptation, language model, etc.), by - - $> local/summarize_results.pl [options] [ [ Date: Wed, 9 Jan 2019 23:03:31 -0500 Subject: [PATCH 37/39] add nara_wpe basic version (no batching) --- egs/chime5/s5/cmd.sh | 2 +- .../s5/local/chain/tuning/run_tdnn_1a.sh | 4 - egs/chime5/s5/local/run_wpe.py | 54 +++++++ egs/chime5/s5/local/run_wpe.sh | 152 ++++++++++++++++++ egs/chime5/s5/run.sh | 25 ++- 5 files changed, 225 insertions(+), 12 deletions(-) create mode 100644 egs/chime5/s5/local/run_wpe.py create mode 100755 egs/chime5/s5/local/run_wpe.sh diff --git a/egs/chime5/s5/cmd.sh b/egs/chime5/s5/cmd.sh index 9702501f1a7..dfa2fab1688 100644 --- a/egs/chime5/s5/cmd.sh +++ b/egs/chime5/s5/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="retry.pl queue.pl --mem 2G" +export train_cmd="retry.pl queue.pl --mem 100G" export decode_cmd="queue.pl --mem 4G" diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh index daad37e2cd7..6066eb310f5 100755 --- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh @@ -198,10 +198,6 @@ if [ $stage -le 14 ]; then --trainer.num-chunk-per-minibatch=256,128,64 \ --trainer.optimization.momentum=0.0 \ --egs.chunk-width=$chunk_width \ - --egs.chunk-left-context=$chunk_left_context \ - --egs.chunk-right-context=$chunk_right_context \ - --egs.chunk-left-context-initial=0 \ - --egs.chunk-right-context-final=0 \ --egs.dir="$common_egs_dir" \ --egs.opts="--frames-overlap-per-eg 0" \ --cleanup.remove-egs=$remove_egs \ diff --git a/egs/chime5/s5/local/run_wpe.py b/egs/chime5/s5/local/run_wpe.py new file mode 100644 index 00000000000..cc9cd41927a --- /dev/null +++ b/egs/chime5/s5/local/run_wpe.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) +# Apache 2.0 +# Works with both python2 and python3 + +import numpy as np +import soundfile as sf +import time +import os, errno +from tqdm import tqdm +import argparse + +from nara_wpe.wpe import wpe +from nara_wpe.utils import stft, istft +from nara_wpe import project_root + +parser = argparse.ArgumentParser() +parser.add_argument('--files', '-f', nargs='+') +args = parser.parse_args() + +input_files = args.files[:len(args.files)//2] +output_files = args.files[len(args.files)//2:] +out_dir = os.path.dirname(output_files[0]) +try: + os.makedirs(out_dir) +except OSError as e: + if e.errno != errno.EEXIST: + raise + +stft_options = dict( + size=512, + shift=128, + window_length=None, + fading=True, + pad=True, + symmetric_window=False +) + +sampling_rate = 16000 +delay = 3 +iterations = 5 +taps = 10 + +signal_list = [ + sf.read(f)[0] + for f in input_files +] +y = np.stack(signal_list, axis=0) +Y = stft(y, **stft_options).transpose(2, 0, 1) +Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) +z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) + +for d in range(len(signal_list)): + sf.write(output_files[d], z[d,:], sampling_rate) diff --git a/egs/chime5/s5/local/run_wpe.sh b/egs/chime5/s5/local/run_wpe.sh new file mode 100755 index 00000000000..f8419ddf6a4 --- /dev/null +++ b/egs/chime5/s5/local/run_wpe.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian) +# Apache 2.0 + +. ./cmd.sh +. ./path.sh + +# Config: +nj=8 +cmd=run.pl + +. utils/parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Wrong #arguments ($#, expected 3)" + echo "Usage: local/run_wpe.sh [options] " + echo "main options (for others, see top of script file)" + echo " --cmd # Command to run in parallel with" + echo " --nj 50 # number of jobs for parallel processing" + exit 1; +fi + +sdir=$1 +odir=$2 +array=$3 +task=`basename $sdir` +expdir=exp/wpe/${task}_${array} +# Set bash to 'debug' mode, it will exit on : +# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', +set -e +set -u +set -o pipefail + +miniconda_dir=$HOME/miniconda3/ +if [ ! -d $miniconda_dir ]; then + echo "$miniconda_dir does not exist. Please run '../../../tools/extras/install_miniconda.sh' and '../../../tools/extras/install_wpe.sh';" +fi + +# check if WPE is installed +result=`$HOME/miniconda3/bin/python -c "\ +try: + import nara_wpe + print('1') +except ImportError: + print('0')"` + +if [ "$result" == "1" ]; then + echo "WPE is installed" +else + echo "WPE is not installed. Please run ../../../tools/extras/install_wpe.sh" + exit 1 +fi + +mkdir -p $odir +mkdir -p $expdir/log + +# wavfiles.list can be used as the name of the output files +output_wavfiles=$expdir/wavfiles.list +find -L ${sdir} | grep -i ${array} > $expdir/channels_input +cat $expdir/channels_input | awk -F '/' '{print $NF}' | sed "s@S@$odir\/S@g" > $expdir/channels_output +paste -d" " $expdir/channels_input $expdir/channels_output > $output_wavfiles + +# split the list for parallel processing +split_wavfiles="" +for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" +done +utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1; + +echo -e "Dereverberation - $task - $array\n" +# making a shell script for each job +for n in `seq $nj`; do +cat <<-EOF > $expdir/log/wpe.$n.sh +while read line; do + $HOME/miniconda3/bin/python local/run_wpe.py \ + --file \$line +done < $output_wavfiles.$n +EOF +done + +chmod a+x $expdir/log/wpe.*.sh +$cmd JOB=1:$nj $expdir/log/wpe.JOB.log \ + $expdir/log/wpe.JOB.sh + +# ################# +# for task in dt et; do + # for nch in 1 2 8; do + # wdir=exp/wpe_${task}_${nch}ch + # mkdir -p $wdir/log + # arrays=$wdir/channels + # output_wavfiles=$wdir/wavfiles.list + # if [ ${nch} == 1 ]; then + # allwavs=`cat ${dir}/${task}_real_1ch_wav.scp | cut -d " " -f2` + # allwavs_output=`cat ${dir}/${task}_real_1ch_wpe_wav.scp | cut -d " " -f2` + # echo $allwavs | tr ' ' '\n' > $wdir/channels_input + # echo $allwavs_output | tr ' ' '\n' > $wdir/channels_output + # paste -d" " $wdir/channels_input $wdir/channels_output > $arrays + # elif [ ${nch} == 2 ]; then + # allwavs=`cat ${dir}/${task}_real_2ch_wav.scp | cut -d " " -f2` + # allwavs_output=`cat ${dir}/${task}_real_2ch_wpe_wav.scp | cut -d " " -f2` + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==1' > $wdir/channels.1st + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==0' > $wdir/channels.2nd + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==1' > $wdir/channels_output.1st + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%2==0' > $wdir/channels_output.2nd + # paste -d" " $wdir/channels.1st $wdir/channels.2nd $wdir/channels_output.1st $wdir/channels_output.2nd > $arrays + # elif [ ${nch} == 8 ]; then + # allwavs=`cat ${dir}/${task}_real_8ch_wav.scp | cut -d " " -f2` + # allwavs_output=`cat ${dir}/${task}_real_8ch_wpe_wav.scp | cut -d " " -f2` + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==1' > $wdir/channels.1st + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==2' > $wdir/channels.2nd + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==3' > $wdir/channels.3rd + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==4' > $wdir/channels.4th + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==5' > $wdir/channels.5th + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==6' > $wdir/channels.6th + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==7' > $wdir/channels.7th + # echo $allwavs | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==0' > $wdir/channels.8th + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==1' > $wdir/channels_output.1st + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==2' > $wdir/channels_output.2nd + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==3' > $wdir/channels_output.3rd + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==4' > $wdir/channels_output.4th + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==5' > $wdir/channels_output.5th + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==6' > $wdir/channels_output.6th + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==7' > $wdir/channels_output.7th + # echo $allwavs_output | tr ' ' '\n' | rev | sort | rev | awk 'NR%8==0' > $wdir/channels_output.8th + # paste -d" " $wdir/channels.1st $wdir/channels.2nd $wdir/channels.3rd $wdir/channels.4th $wdir/channels.5th $wdir/channels.6th $wdir/channels.7th $wdir/channels.8th $wdir/channels_output.1st $wdir/channels_output.2nd $wdir/channels_output.3rd $wdir/channels_output.4th $wdir/channels_output.5th $wdir/channels_output.6th $wdir/channels_output.7th $wdir/channels_output.8th > $arrays + # fi + + # # split the list for parallel processing + # split_wavfiles="" + # for n in `seq $nj`; do + # split_wavfiles="$split_wavfiles $output_wavfiles.$n" + # done + # utils/split_scp.pl $arrays $split_wavfiles || exit 1; + + # echo -e "Dereverberation - $task - real - $nch ch\n" + # # making a shell script for each job + # for n in `seq $nj`; do + # cat <<-EOF > $wdir/log/wpe.$n.sh + # while read line; do + # $HOME/miniconda3/bin/python local/run_wpe.py \ + # --file \$line + # done < $output_wavfiles.$n + # EOF + # done + + # chmod a+x $wdir/log/wpe.*.sh + # $cmd JOB=1:$nj $wdir/log/wpe.JOB.log \ + # $wdir/log/wpe.JOB.sh + # done +# done + +echo "`basename $0` Done." diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh index cc2cc3e558b..8e4072bf9f4 100755 --- a/egs/chime5/s5/run.sh +++ b/egs/chime5/s5/run.sh @@ -73,10 +73,20 @@ if [ $stage -le 4 ]; then # Beamforming using reference arrays # enhanced WAV directory enhandir=enhan + dereverb_dir=${PWD}/wav/wpe/ for dset in dev eval; do for mictype in u01 u02 u03 u04 u05 u06; do - local/run_beamformit.sh --cmd "$train_cmd" \ + local/run_wpe.sh --cmd "$train_cmd" \ ${audio_dir}/${dset} \ + ${dereverb_dir}/${dset} \ + ${mictype} + done + done + + for dset in dev eval; do + for mictype in u01 u02 u03 u04 u05 u06; do + local/run_beamformit.sh --cmd "$train_cmd" \ + ${dereverb_dir}/${dset} \ ${enhandir}/${dset}_${enhancement}_${mictype} \ ${mictype} done @@ -84,7 +94,7 @@ if [ $stage -le 4 ]; then for dset in dev eval; do local/prepare_data.sh --mictype ref "$PWD/${enhandir}/${dset}_${enhancement}_u0*" \ - ${json_dir}/${dset} data/${dset}_${enhancement}_ref + ${json_dir}/${dset} data/${dset}_${enhancement}_dereverb_ref done fi @@ -243,7 +253,7 @@ fi if [ $stage -le 17 ]; then rm -r data/train_worn_cleaned 2>/dev/null || true - utils/copy_data_dir.sh data/${train_set}_clean data/train_worn_cleaned + utils/copy_data_dir.sh data/${train_set}_cleaned data/train_worn_cleaned awk '{print $1}' data/train_worn/wav.scp > data/train_worn_cleaned/recos.tmp utils/filter_scp.pl data/train_worn_cleaned/recos.tmp \ @@ -252,7 +262,7 @@ if [ $stage -le 17 ]; then utils/fix_data_dir.sh data/train_worn_cleaned rm -r data/train_u400k_cleaned 2>/dev/null || true - utils/copy_data_dir.sh data/${train_set}_clean data/train_u400k_cleaned + utils/copy_data_dir.sh data/${train_set}_cleaned data/train_u400k_cleaned utils/filter_scp.pl --exclude data/train_worn_cleaned/recos.tmp \ data/${train_set}_cleaned/wav.scp > data/train_u400k_cleaned/wav.scp @@ -262,7 +272,8 @@ fi if [ $stage -le 18 ]; then # chain TDNN - local/chain/multi_condition/run_tdnn.sh --nj ${nj} \ + #local/chain/tuning/run_tdnn_rvb_1b.sh --nj ${nj} --train-set ${train_set}_cleaned --test-sets "$test_sets" --gmm tri3_cleaned --nnet3-affix _${train_set}_cleaned + local/chain/tuning/run_tdnn_rvb_1b.sh --stage 16 --nj ${nj} \ --train-set-clean train_worn_cleaned \ --train-set-noisy train_u400k_cleaned \ --combined-train-set ${train_set}_cleaned \ @@ -275,6 +286,6 @@ if [ $stage -le 19 ]; then # please specify both dev and eval set directories so that the search parameters # (insertion penalty and language model weight) will be tuned using the dev set local/score_for_submit.sh \ - --dev exp/chain_${train_set}_cleaned/tdnn1a_sp/decode_dev_${enhancement}_ref \ - --eval exp/chain_${train_set}_cleaned/tdnn1a_sp/decode_eval_${enhancement}_ref + --dev exp/chain_${train_set}_cleaned_rvb/tdnn_rvb_1b_sp/decode_dev_${enhancement}_ref \ + --eval exp/chain_${train_set}_cleaned_rvb/tdnn_rvb_1b_sp/decode_eval_${enhancement}_ref fi From 30235383814b2303dfbaa37bfbaf1cfeadeab765 Mon Sep 17 00:00:00 2001 From: Szu-JuiChen <31828751+Szu-JuiChen@users.noreply.github.com> Date: Wed, 9 Jan 2019 22:24:47 -0600 Subject: [PATCH 38/39] Update README.txt --- egs/reverb/s5/README.txt | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/egs/reverb/s5/README.txt b/egs/reverb/s5/README.txt index 295b3da0582..2a3770630e9 100644 --- a/egs/reverb/s5/README.txt +++ b/egs/reverb/s5/README.txt @@ -1,23 +1,11 @@ -Improved baseline for REVERB challenge based on Kaldi -============================================================================== +Improved baseline for REVERB challenge +====================================== -updated -Wed Nov 28 11:36:30 EST 2018 Szu-Jui Chen - -updated -Wed Apr 29 19:10:33 EDT 2015 Shinji Watanabe - -updated -Wed Apr 9 12:14:02 CEST 2014 Felix Weninger - -original: -Wed Nov 6 14:47:59 EST 2013 Felix Weninger +This is an improvement over "Improved multi condition training baseline" from Felix Weninger & Shinji Watanabe Key specs: -- MFCC-LDA-STC front-end(not sure) +- Nara-WPE and BeamformIt front-end enhancement - TDNN acoustic model -- Utterance-based adaptation using basis fMLLR -- Tri-gram LM minimum Bayes risk decoding(not sure) RESULT: For experiment results, please see RESULTS for more detail @@ -42,4 +30,3 @@ INSTRUCTIONS: Depending on your system specs (# of CPUs, RAM) you might want (or have) to change the number of parallel jobs -- this is controlled by the nj and decode_nj variables (# of jobs for training, for decoding). - From 8b0b56e909641e0dd8794c83f800092e619dba8a Mon Sep 17 00:00:00 2001 From: Szu-JuiChen <31828751+Szu-JuiChen@users.noreply.github.com> Date: Wed, 9 Jan 2019 23:50:20 -0600 Subject: [PATCH 39/39] Update cmd.sh --- egs/chime5/s5/cmd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/chime5/s5/cmd.sh b/egs/chime5/s5/cmd.sh index dfa2fab1688..0b54a2acda4 100644 --- a/egs/chime5/s5/cmd.sh +++ b/egs/chime5/s5/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="retry.pl queue.pl --mem 100G" +export train_cmd="retry.pl queue.pl --mem 120G" export decode_cmd="queue.pl --mem 4G"